4532: Add `url` and `api_key` to ollama r=ManyTheFish a=dureuill

See [Usage page](https://meilisearch.notion.site/v1-8-AI-search-API-usage-135552d6e85a4a52bc7109be82aeca42#5c77ef49e78e43388c1d3d5429151357)

### Motivation

- Before this PR, the url for ollama is only read from the environment. This is a needless restriction that will be troublesome in settings where passing an environment variable is complex or impossible (e.g., the Cloud)
- Before this PR, ollama did not support an api_key. While ollama does not natively support API keys, [a common practice](https://github.com/ollama/ollama/issues/849) is to put a publicly accessible ollama server behind a proxy to support authentication.

### Skip changelog

ollama embedder was added to v1.8

Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-03-28 12:35:19 +00:00 committed by GitHub
commit 781e2d7750
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
4 changed files with 102 additions and 53 deletions

View File

@ -1225,14 +1225,24 @@ pub fn validate_embedding_settings(
};
match inferred_source {
EmbedderSource::OpenAi => {
check_unset(&revision, "revision", inferred_source, name)?;
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
check_unset(&url, "url", inferred_source, name)?;
check_unset(&query, "query", inferred_source, name)?;
check_unset(&input_field, "inputField", inferred_source, name)?;
check_unset(&path_to_embeddings, "pathToEmbeddings", inferred_source, name)?;
check_unset(&embedding_object, "embeddingObject", inferred_source, name)?;
check_unset(&input_type, "inputType", inferred_source, name)?;
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?;
check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?;
check_unset(
&path_to_embeddings,
EmbeddingSettings::PATH_TO_EMBEDDINGS,
inferred_source,
name,
)?;
check_unset(
&embedding_object,
EmbeddingSettings::EMBEDDING_OBJECT,
inferred_source,
name,
)?;
check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?;
if let Setting::Set(model) = &model {
let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str())
@ -1266,47 +1276,80 @@ pub fn validate_embedding_settings(
}
EmbedderSource::Ollama => {
// Dimensions get inferred, only model name is required
check_unset(&dimensions, "dimensions", inferred_source, name)?;
check_set(&model, "model", inferred_source, name)?;
check_unset(&api_key, "apiKey", inferred_source, name)?;
check_unset(&revision, "revision", inferred_source, name)?;
check_unset(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
check_unset(&url, "url", inferred_source, name)?;
check_unset(&query, "query", inferred_source, name)?;
check_unset(&input_field, "inputField", inferred_source, name)?;
check_unset(&path_to_embeddings, "pathToEmbeddings", inferred_source, name)?;
check_unset(&embedding_object, "embeddingObject", inferred_source, name)?;
check_unset(&input_type, "inputType", inferred_source, name)?;
check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?;
check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?;
check_unset(
&path_to_embeddings,
EmbeddingSettings::PATH_TO_EMBEDDINGS,
inferred_source,
name,
)?;
check_unset(
&embedding_object,
EmbeddingSettings::EMBEDDING_OBJECT,
inferred_source,
name,
)?;
check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?;
}
EmbedderSource::HuggingFace => {
check_unset(&api_key, "apiKey", inferred_source, name)?;
check_unset(&dimensions, "dimensions", inferred_source, name)?;
check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?;
check_unset(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
check_unset(&url, "url", inferred_source, name)?;
check_unset(&query, "query", inferred_source, name)?;
check_unset(&input_field, "inputField", inferred_source, name)?;
check_unset(&path_to_embeddings, "pathToEmbeddings", inferred_source, name)?;
check_unset(&embedding_object, "embeddingObject", inferred_source, name)?;
check_unset(&input_type, "inputType", inferred_source, name)?;
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?;
check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?;
check_unset(
&path_to_embeddings,
EmbeddingSettings::PATH_TO_EMBEDDINGS,
inferred_source,
name,
)?;
check_unset(
&embedding_object,
EmbeddingSettings::EMBEDDING_OBJECT,
inferred_source,
name,
)?;
check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?;
}
EmbedderSource::UserProvided => {
check_unset(&model, "model", inferred_source, name)?;
check_unset(&revision, "revision", inferred_source, name)?;
check_unset(&api_key, "apiKey", inferred_source, name)?;
check_unset(&document_template, "documentTemplate", inferred_source, name)?;
check_set(&dimensions, "dimensions", inferred_source, name)?;
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?;
check_unset(
&document_template,
EmbeddingSettings::DOCUMENT_TEMPLATE,
inferred_source,
name,
)?;
check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?;
check_unset(&url, "url", inferred_source, name)?;
check_unset(&query, "query", inferred_source, name)?;
check_unset(&input_field, "inputField", inferred_source, name)?;
check_unset(&path_to_embeddings, "pathToEmbeddings", inferred_source, name)?;
check_unset(&embedding_object, "embeddingObject", inferred_source, name)?;
check_unset(&input_type, "inputType", inferred_source, name)?;
check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?;
check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?;
check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?;
check_unset(
&path_to_embeddings,
EmbeddingSettings::PATH_TO_EMBEDDINGS,
inferred_source,
name,
)?;
check_unset(
&embedding_object,
EmbeddingSettings::EMBEDDING_OBJECT,
inferred_source,
name,
)?;
check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?;
}
EmbedderSource::Rest => {
check_unset(&model, "model", inferred_source, name)?;
check_unset(&revision, "revision", inferred_source, name)?;
check_set(&url, "url", inferred_source, name)?;
check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?;
check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?;
check_set(&url, EmbeddingSettings::URL, inferred_source, name)?;
}
}
Ok(Setting::Set(EmbeddingSettings {

View File

@ -201,8 +201,8 @@ impl EmbedderOptions {
Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key))
}
pub fn ollama() -> Self {
Self::Ollama(ollama::EmbedderOptions::with_default_model())
pub fn ollama(api_key: Option<String>, url: Option<String>) -> Self {
Self::Ollama(ollama::EmbedderOptions::with_default_model(api_key, url))
}
}

View File

@ -12,15 +12,13 @@ pub struct Embedder {
#[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)]
pub struct EmbedderOptions {
pub embedding_model: String,
pub url: Option<String>,
pub api_key: Option<String>,
}
impl EmbedderOptions {
pub fn with_default_model() -> Self {
Self { embedding_model: "nomic-embed-text".into() }
}
pub fn with_embedding_model(embedding_model: String) -> Self {
Self { embedding_model }
pub fn with_default_model(api_key: Option<String>, url: Option<String>) -> Self {
Self { embedding_model: "nomic-embed-text".into(), api_key, url }
}
}
@ -28,10 +26,10 @@ impl Embedder {
pub fn new(options: EmbedderOptions) -> Result<Self, NewEmbedderError> {
let model = options.embedding_model.as_str();
let rest_embedder = match RestEmbedder::new(RestEmbedderOptions {
api_key: None,
api_key: options.api_key,
distribution: None,
dimensions: None,
url: get_ollama_path(),
url: options.url.unwrap_or_else(get_ollama_path),
query: serde_json::json!({
"model": model,
}),

View File

@ -114,7 +114,9 @@ impl EmbeddingSettings {
&[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama]
}
Self::REVISION => &[EmbedderSource::HuggingFace],
Self::API_KEY => &[EmbedderSource::OpenAi, EmbedderSource::Rest],
Self::API_KEY => {
&[EmbedderSource::OpenAi, EmbedderSource::Ollama, EmbedderSource::Rest]
}
Self::DIMENSIONS => {
&[EmbedderSource::OpenAi, EmbedderSource::UserProvided, EmbedderSource::Rest]
}
@ -124,7 +126,7 @@ impl EmbeddingSettings {
EmbedderSource::Ollama,
EmbedderSource::Rest,
],
Self::URL => &[EmbedderSource::Rest],
Self::URL => &[EmbedderSource::Ollama, EmbedderSource::Rest],
Self::QUERY => &[EmbedderSource::Rest],
Self::INPUT_FIELD => &[EmbedderSource::Rest],
Self::PATH_TO_EMBEDDINGS => &[EmbedderSource::Rest],
@ -146,7 +148,9 @@ impl EmbeddingSettings {
EmbedderSource::HuggingFace => {
&[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE]
}
EmbedderSource::Ollama => &[Self::SOURCE, Self::MODEL, Self::DOCUMENT_TEMPLATE],
EmbedderSource::Ollama => {
&[Self::SOURCE, Self::MODEL, Self::DOCUMENT_TEMPLATE, Self::URL, Self::API_KEY]
}
EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS],
EmbedderSource::Rest => &[
Self::SOURCE,
@ -387,10 +391,14 @@ impl From<EmbeddingSettings> for EmbeddingConfig {
}
EmbedderSource::Ollama => {
let mut options: ollama::EmbedderOptions =
super::ollama::EmbedderOptions::with_default_model();
super::ollama::EmbedderOptions::with_default_model(
api_key.set(),
url.set(),
);
if let Some(model) = model.set() {
options.embedding_model = model;
}
this.embedder_options = super::EmbedderOptions::Ollama(options);
}
EmbedderSource::HuggingFace => {