diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index e902badc0..2b1be9453 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1225,14 +1225,24 @@ pub fn validate_embedding_settings( }; match inferred_source { EmbedderSource::OpenAi => { - check_unset(&revision, "revision", inferred_source, name)?; + check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; - check_unset(&url, "url", inferred_source, name)?; - check_unset(&query, "query", inferred_source, name)?; - check_unset(&input_field, "inputField", inferred_source, name)?; - check_unset(&path_to_embeddings, "pathToEmbeddings", inferred_source, name)?; - check_unset(&embedding_object, "embeddingObject", inferred_source, name)?; - check_unset(&input_type, "inputType", inferred_source, name)?; + check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; + check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?; + check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?; + check_unset( + &path_to_embeddings, + EmbeddingSettings::PATH_TO_EMBEDDINGS, + inferred_source, + name, + )?; + check_unset( + &embedding_object, + EmbeddingSettings::EMBEDDING_OBJECT, + inferred_source, + name, + )?; + check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?; if let Setting::Set(model) = &model { let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str()) @@ -1266,47 +1276,80 @@ pub fn validate_embedding_settings( } EmbedderSource::Ollama => { // Dimensions get inferred, only model name is required - check_unset(&dimensions, "dimensions", inferred_source, name)?; - check_set(&model, "model", inferred_source, name)?; - check_unset(&api_key, "apiKey", inferred_source, name)?; - check_unset(&revision, "revision", inferred_source, name)?; + check_unset(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; + check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?; + check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; - check_unset(&url, "url", inferred_source, name)?; - check_unset(&query, "query", inferred_source, name)?; - check_unset(&input_field, "inputField", inferred_source, name)?; - check_unset(&path_to_embeddings, "pathToEmbeddings", inferred_source, name)?; - check_unset(&embedding_object, "embeddingObject", inferred_source, name)?; - check_unset(&input_type, "inputType", inferred_source, name)?; + check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?; + check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?; + check_unset( + &path_to_embeddings, + EmbeddingSettings::PATH_TO_EMBEDDINGS, + inferred_source, + name, + )?; + check_unset( + &embedding_object, + EmbeddingSettings::EMBEDDING_OBJECT, + inferred_source, + name, + )?; + check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?; } EmbedderSource::HuggingFace => { - check_unset(&api_key, "apiKey", inferred_source, name)?; - check_unset(&dimensions, "dimensions", inferred_source, name)?; + check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?; + check_unset(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; - check_unset(&url, "url", inferred_source, name)?; - check_unset(&query, "query", inferred_source, name)?; - check_unset(&input_field, "inputField", inferred_source, name)?; - check_unset(&path_to_embeddings, "pathToEmbeddings", inferred_source, name)?; - check_unset(&embedding_object, "embeddingObject", inferred_source, name)?; - check_unset(&input_type, "inputType", inferred_source, name)?; + check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; + check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?; + check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?; + check_unset( + &path_to_embeddings, + EmbeddingSettings::PATH_TO_EMBEDDINGS, + inferred_source, + name, + )?; + check_unset( + &embedding_object, + EmbeddingSettings::EMBEDDING_OBJECT, + inferred_source, + name, + )?; + check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?; } EmbedderSource::UserProvided => { - check_unset(&model, "model", inferred_source, name)?; - check_unset(&revision, "revision", inferred_source, name)?; - check_unset(&api_key, "apiKey", inferred_source, name)?; - check_unset(&document_template, "documentTemplate", inferred_source, name)?; - check_set(&dimensions, "dimensions", inferred_source, name)?; + check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; + check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; + check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?; + check_unset( + &document_template, + EmbeddingSettings::DOCUMENT_TEMPLATE, + inferred_source, + name, + )?; + check_set(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; - check_unset(&url, "url", inferred_source, name)?; - check_unset(&query, "query", inferred_source, name)?; - check_unset(&input_field, "inputField", inferred_source, name)?; - check_unset(&path_to_embeddings, "pathToEmbeddings", inferred_source, name)?; - check_unset(&embedding_object, "embeddingObject", inferred_source, name)?; - check_unset(&input_type, "inputType", inferred_source, name)?; + check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; + check_unset(&query, EmbeddingSettings::QUERY, inferred_source, name)?; + check_unset(&input_field, EmbeddingSettings::INPUT_FIELD, inferred_source, name)?; + check_unset( + &path_to_embeddings, + EmbeddingSettings::PATH_TO_EMBEDDINGS, + inferred_source, + name, + )?; + check_unset( + &embedding_object, + EmbeddingSettings::EMBEDDING_OBJECT, + inferred_source, + name, + )?; + check_unset(&input_type, EmbeddingSettings::INPUT_TYPE, inferred_source, name)?; } EmbedderSource::Rest => { - check_unset(&model, "model", inferred_source, name)?; - check_unset(&revision, "revision", inferred_source, name)?; - check_set(&url, "url", inferred_source, name)?; + check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; + check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; + check_set(&url, EmbeddingSettings::URL, inferred_source, name)?; } } Ok(Setting::Set(EmbeddingSettings { diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index 65654af4a..8b25de56d 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -201,8 +201,8 @@ impl EmbedderOptions { Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key)) } - pub fn ollama() -> Self { - Self::Ollama(ollama::EmbedderOptions::with_default_model()) + pub fn ollama(api_key: Option, url: Option) -> Self { + Self::Ollama(ollama::EmbedderOptions::with_default_model(api_key, url)) } } diff --git a/milli/src/vector/ollama.rs b/milli/src/vector/ollama.rs index 9c44e8052..578b6c8e2 100644 --- a/milli/src/vector/ollama.rs +++ b/milli/src/vector/ollama.rs @@ -12,15 +12,13 @@ pub struct Embedder { #[derive(Debug, Clone, Hash, PartialEq, Eq, serde::Deserialize, serde::Serialize)] pub struct EmbedderOptions { pub embedding_model: String, + pub url: Option, + pub api_key: Option, } impl EmbedderOptions { - pub fn with_default_model() -> Self { - Self { embedding_model: "nomic-embed-text".into() } - } - - pub fn with_embedding_model(embedding_model: String) -> Self { - Self { embedding_model } + pub fn with_default_model(api_key: Option, url: Option) -> Self { + Self { embedding_model: "nomic-embed-text".into(), api_key, url } } } @@ -28,10 +26,10 @@ impl Embedder { pub fn new(options: EmbedderOptions) -> Result { let model = options.embedding_model.as_str(); let rest_embedder = match RestEmbedder::new(RestEmbedderOptions { - api_key: None, + api_key: options.api_key, distribution: None, dimensions: None, - url: get_ollama_path(), + url: options.url.unwrap_or_else(get_ollama_path), query: serde_json::json!({ "model": model, }), diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index c5b0d0326..c277dd0cf 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -114,7 +114,9 @@ impl EmbeddingSettings { &[EmbedderSource::HuggingFace, EmbedderSource::OpenAi, EmbedderSource::Ollama] } Self::REVISION => &[EmbedderSource::HuggingFace], - Self::API_KEY => &[EmbedderSource::OpenAi, EmbedderSource::Rest], + Self::API_KEY => { + &[EmbedderSource::OpenAi, EmbedderSource::Ollama, EmbedderSource::Rest] + } Self::DIMENSIONS => { &[EmbedderSource::OpenAi, EmbedderSource::UserProvided, EmbedderSource::Rest] } @@ -124,7 +126,7 @@ impl EmbeddingSettings { EmbedderSource::Ollama, EmbedderSource::Rest, ], - Self::URL => &[EmbedderSource::Rest], + Self::URL => &[EmbedderSource::Ollama, EmbedderSource::Rest], Self::QUERY => &[EmbedderSource::Rest], Self::INPUT_FIELD => &[EmbedderSource::Rest], Self::PATH_TO_EMBEDDINGS => &[EmbedderSource::Rest], @@ -146,7 +148,9 @@ impl EmbeddingSettings { EmbedderSource::HuggingFace => { &[Self::SOURCE, Self::MODEL, Self::REVISION, Self::DOCUMENT_TEMPLATE] } - EmbedderSource::Ollama => &[Self::SOURCE, Self::MODEL, Self::DOCUMENT_TEMPLATE], + EmbedderSource::Ollama => { + &[Self::SOURCE, Self::MODEL, Self::DOCUMENT_TEMPLATE, Self::URL, Self::API_KEY] + } EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS], EmbedderSource::Rest => &[ Self::SOURCE, @@ -387,10 +391,14 @@ impl From for EmbeddingConfig { } EmbedderSource::Ollama => { let mut options: ollama::EmbedderOptions = - super::ollama::EmbedderOptions::with_default_model(); + super::ollama::EmbedderOptions::with_default_model( + api_key.set(), + url.set(), + ); if let Some(model) = model.set() { options.embedding_model = model; } + this.embedder_options = super::EmbedderOptions::Ollama(options); } EmbedderSource::HuggingFace => {