From 22ef2d877f6104af58a99721a78268fd53720c12 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 25 Jul 2024 12:00:18 +0200 Subject: [PATCH 1/9] Ensure test server has a single indexing thread --- meilisearch/tests/common/server.rs | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/meilisearch/tests/common/server.rs b/meilisearch/tests/common/server.rs index e2c25efc6..d5c6c20f9 100644 --- a/meilisearch/tests/common/server.rs +++ b/meilisearch/tests/common/server.rs @@ -1,6 +1,7 @@ #![allow(dead_code)] use std::path::Path; +use std::str::FromStr as _; use std::time::Duration; use actix_http::body::MessageBody; @@ -8,7 +9,7 @@ use actix_web::dev::ServiceResponse; use actix_web::http::StatusCode; use byte_unit::{Byte, Unit}; use clap::Parser; -use meilisearch::option::{IndexerOpts, MaxMemory, Opt}; +use meilisearch::option::{IndexerOpts, MaxMemory, MaxThreads, Opt}; use meilisearch::{analytics, create_app, setup_meilisearch, SubscriberForSecondLayer}; use once_cell::sync::Lazy; use tempfile::TempDir; @@ -239,7 +240,7 @@ pub fn default_settings(dir: impl AsRef) -> Opt { // memory has to be unlimited because several meilisearch are running in test context. max_indexing_memory: MaxMemory::unlimited(), skip_index_budget: true, - ..Parser::parse_from(None as Option<&str>) + max_indexing_threads: MaxThreads::from_str("1").unwrap(), }, experimental_enable_metrics: false, ..Parser::parse_from(None as Option<&str>) From 4654d51e056a483750cc87d08ab49fcdc2f8732a Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 22 Jul 2024 12:04:05 +0200 Subject: [PATCH 2/9] Add custom headers for REST embedder --- milli/src/update/index_documents/mod.rs | 1 + milli/src/update/settings.rs | 9 +++++++++ milli/src/vector/ollama.rs | 1 + milli/src/vector/openai.rs | 1 + milli/src/vector/rest.rs | 10 ++++++++- milli/src/vector/settings.rs | 27 +++++++++++++++++++++++++ 6 files changed, 48 insertions(+), 1 deletion(-) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 1df31fff2..39919d94a 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -2744,6 +2744,7 @@ mod tests { request: Setting::NotSet, response: Setting::NotSet, distribution: Setting::NotSet, + headers: Setting::NotSet, }), ); settings.set_embedder_settings(embedders); diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index e423852f1..2836f4bc9 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1544,6 +1544,7 @@ fn validate_prompt( request, response, distribution, + headers, }) => { // validate let template = crate::prompt::Prompt::new(template) @@ -1561,6 +1562,7 @@ fn validate_prompt( request, response, distribution, + headers, })) } new => Ok(new), @@ -1584,6 +1586,7 @@ pub fn validate_embedding_settings( request, response, distribution, + headers, } = settings; if let Some(0) = dimensions.set() { @@ -1622,6 +1625,7 @@ pub fn validate_embedding_settings( request, response, distribution, + headers, })); }; match inferred_source { @@ -1630,6 +1634,7 @@ pub fn validate_embedding_settings( check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; + check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; if let Setting::Set(model) = &model { let model = crate::vector::openai::EmbeddingModel::from_name(model.as_str()) @@ -1669,6 +1674,7 @@ pub fn validate_embedding_settings( check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; + check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; } EmbedderSource::HuggingFace => { check_unset(&api_key, EmbeddingSettings::API_KEY, inferred_source, name)?; @@ -1677,6 +1683,7 @@ pub fn validate_embedding_settings( check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; + check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; } EmbedderSource::UserProvided => { check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; @@ -1693,6 +1700,7 @@ pub fn validate_embedding_settings( check_unset(&url, EmbeddingSettings::URL, inferred_source, name)?; check_unset(&request, EmbeddingSettings::REQUEST, inferred_source, name)?; check_unset(&response, EmbeddingSettings::RESPONSE, inferred_source, name)?; + check_unset(&headers, EmbeddingSettings::HEADERS, inferred_source, name)?; } EmbedderSource::Rest => { check_unset(&model, EmbeddingSettings::MODEL, inferred_source, name)?; @@ -1713,6 +1721,7 @@ pub fn validate_embedding_settings( request, response, distribution, + headers, })) } diff --git a/milli/src/vector/ollama.rs b/milli/src/vector/ollama.rs index 84baac1ba..d8b75342b 100644 --- a/milli/src/vector/ollama.rs +++ b/milli/src/vector/ollama.rs @@ -41,6 +41,7 @@ impl Embedder { response: serde_json::json!({ "embedding": super::rest::RESPONSE_PLACEHOLDER, }), + headers: Default::default(), }, super::rest::ConfigurationSource::Ollama, ) { diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index 514ad4a3b..ce63e69d7 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -195,6 +195,7 @@ impl Embedder { super::rest::REPEAT_PLACEHOLDER ] }), + headers: Default::default(), }, super::rest::ConfigurationSource::OpenAi, )?; diff --git a/milli/src/vector/rest.rs b/milli/src/vector/rest.rs index 35a7ebc41..593d2b509 100644 --- a/milli/src/vector/rest.rs +++ b/milli/src/vector/rest.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use deserr::Deserr; use rand::Rng; use rayon::iter::{IntoParallelIterator as _, ParallelIterator as _}; @@ -80,6 +82,7 @@ pub struct Embedder { struct EmbedderData { client: ureq::Agent, bearer: Option, + headers: BTreeMap, url: String, request: Request, response: Response, @@ -94,6 +97,7 @@ pub struct EmbedderOptions { pub url: String, pub request: serde_json::Value, pub response: serde_json::Value, + pub headers: BTreeMap, } impl std::hash::Hash for EmbedderOptions { @@ -138,6 +142,7 @@ impl Embedder { request, response, configuration_source, + headers: options.headers, }; let dimensions = if let Some(dimensions) = options.dimensions { @@ -223,7 +228,10 @@ where } else { request }; - let request = request.set("Content-Type", "application/json"); + let mut request = request.set("Content-Type", "application/json"); + for (header, value) in &data.headers { + request = request.set(header.as_str(), value.as_str()); + } let body = data.request.inject_texts(inputs); diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index e15999d4f..ef0c8f7ff 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use deserr::Deserr; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; @@ -41,6 +43,9 @@ pub struct EmbeddingSettings { pub response: Setting, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default)] + pub headers: Setting>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default)] pub distribution: Setting, } @@ -105,6 +110,7 @@ impl SettingsDiff { mut request, mut response, mut distribution, + mut headers, } = old; let EmbeddingSettings { @@ -118,6 +124,7 @@ impl SettingsDiff { request: new_request, response: new_response, distribution: new_distribution, + headers: new_headers, } = new; let mut reindex_action = None; @@ -135,6 +142,7 @@ impl SettingsDiff { &mut request, &mut response, &mut document_template, + &mut headers, ) } if model.apply(new_model) { @@ -173,6 +181,7 @@ impl SettingsDiff { distribution.apply(new_distribution); api_key.apply(new_api_key); + headers.apply(new_headers); let updated_settings = EmbeddingSettings { source, @@ -185,6 +194,7 @@ impl SettingsDiff { request, response, distribution, + headers, }; match reindex_action { @@ -218,6 +228,7 @@ fn apply_default_for_source( request: &mut Setting, response: &mut Setting, document_template: &mut Setting, + headers: &mut Setting>, ) { match source { Setting::Set(EmbedderSource::HuggingFace) => { @@ -227,6 +238,7 @@ fn apply_default_for_source( *url = Setting::NotSet; *request = Setting::NotSet; *response = Setting::NotSet; + *headers = Setting::NotSet; } Setting::Set(EmbedderSource::Ollama) => { *model = Setting::Reset; @@ -235,6 +247,7 @@ fn apply_default_for_source( *url = Setting::NotSet; *request = Setting::NotSet; *response = Setting::NotSet; + *headers = Setting::NotSet; } Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { *model = Setting::Reset; @@ -243,6 +256,7 @@ fn apply_default_for_source( *url = Setting::Reset; *request = Setting::NotSet; *response = Setting::NotSet; + *headers = Setting::NotSet; } Setting::Set(EmbedderSource::Rest) => { *model = Setting::NotSet; @@ -251,6 +265,7 @@ fn apply_default_for_source( *url = Setting::Reset; *request = Setting::Reset; *response = Setting::Reset; + *headers = Setting::Reset; } Setting::Set(EmbedderSource::UserProvided) => { *model = Setting::NotSet; @@ -260,6 +275,7 @@ fn apply_default_for_source( *request = Setting::NotSet; *response = Setting::NotSet; *document_template = Setting::NotSet; + *headers = Setting::NotSet; } Setting::NotSet => {} } @@ -293,6 +309,7 @@ impl EmbeddingSettings { pub const URL: &'static str = "url"; pub const REQUEST: &'static str = "request"; pub const RESPONSE: &'static str = "response"; + pub const HEADERS: &'static str = "headers"; pub const DISTRIBUTION: &'static str = "distribution"; @@ -324,6 +341,7 @@ impl EmbeddingSettings { Self::URL => &[EmbedderSource::Ollama, EmbedderSource::Rest, EmbedderSource::OpenAi], Self::REQUEST => &[EmbedderSource::Rest], Self::RESPONSE => &[EmbedderSource::Rest], + Self::HEADERS => &[EmbedderSource::Rest], Self::DISTRIBUTION => &[ EmbedderSource::HuggingFace, EmbedderSource::Ollama, @@ -370,6 +388,7 @@ impl EmbeddingSettings { Self::URL, Self::REQUEST, Self::RESPONSE, + Self::HEADERS, Self::DISTRIBUTION, ], } @@ -440,6 +459,7 @@ impl From for EmbeddingSettings { url: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, + headers: Setting::NotSet, distribution: distribution.map(Setting::Set).unwrap_or_default(), }, super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions { @@ -458,6 +478,7 @@ impl From for EmbeddingSettings { url: url.map(Setting::Set).unwrap_or_default(), request: Setting::NotSet, response: Setting::NotSet, + headers: Setting::NotSet, distribution: distribution.map(Setting::Set).unwrap_or_default(), }, super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions { @@ -475,6 +496,7 @@ impl From for EmbeddingSettings { url: url.map(Setting::Set).unwrap_or_default(), request: Setting::NotSet, response: Setting::NotSet, + headers: Setting::NotSet, distribution: distribution.map(Setting::Set).unwrap_or_default(), }, super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { @@ -490,6 +512,7 @@ impl From for EmbeddingSettings { url: Setting::NotSet, request: Setting::NotSet, response: Setting::NotSet, + headers: Setting::NotSet, distribution: distribution.map(Setting::Set).unwrap_or_default(), }, super::EmbedderOptions::Rest(super::rest::EmbedderOptions { @@ -499,6 +522,7 @@ impl From for EmbeddingSettings { request, response, distribution, + headers, }) => Self { source: Setting::Set(EmbedderSource::Rest), model: Setting::NotSet, @@ -510,6 +534,7 @@ impl From for EmbeddingSettings { request: Setting::Set(request), response: Setting::Set(response), distribution: distribution.map(Setting::Set).unwrap_or_default(), + headers: Setting::Set(headers), }, } } @@ -529,6 +554,7 @@ impl From for EmbeddingConfig { request, response, distribution, + headers, } = value; if let Some(source) = source.set() { @@ -598,6 +624,7 @@ impl From for EmbeddingConfig { request: request.set().unwrap(), response: response.set().unwrap(), distribution: distribution.set(), + headers: headers.set().unwrap_or_default(), }) } } From 8338df0dbe49fa2174fa613b090f57110250875a Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 22 Jul 2024 12:04:29 +0200 Subject: [PATCH 3/9] Fix tests --- .../snapshots/index_scheduler__tests__import_vectors-5.snap | 3 ++- .../snapshots/index_scheduler__tests__settings_update-5.snap | 3 ++- .../lib.rs/import_vectors/Intel to kefir succeeds.snap | 2 +- .../src/snapshots/lib.rs/import_vectors/Intel to kefir.snap | 2 +- .../snapshots/lib.rs/import_vectors/adding Intel succeeds.snap | 2 +- .../snapshots/lib.rs/import_vectors/after adding Intel.snap | 2 +- .../after_registering_settings_task_vectors.snap | 2 +- .../import_vectors/settings_update_processed_vectors.snap | 2 +- .../test_settings_update/after_registering_settings_task.snap | 2 +- .../lib.rs/test_settings_update/settings_update_processed.snap | 2 +- meilisearch/tests/settings/get_settings.rs | 3 ++- 11 files changed, 14 insertions(+), 11 deletions(-) diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap index 97a67ea0a..c08aa8116 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap +++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap @@ -9,6 +9,7 @@ expression: fakerest_config.embedder_options "dimensions": 384, "url": "http://localhost:7777", "request": "{{text}}", - "response": "{{embedding}}" + "response": "{{embedding}}", + "headers": {} } } diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap b/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap index a88f94df3..061de75a5 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap +++ b/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap @@ -9,6 +9,7 @@ expression: config.embedder_options "dimensions": 4, "url": "http://localhost:7777", "request": "{{text}}", - "response": "{{embedding}}" + "response": "{{embedding}}", + "headers": {} } } diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap index a28e85204..add94c403 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap index 344134888..2c2e986a6 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap index fd8096d13..32cd22281 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap index 24098d658..adf7a06a6 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap index c11dfba62..30d71a7f5 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap index bc87d8212..163d23aac 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap index e5fab9ad6..8bd4d7739 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap index b49cee730..942e0b89f 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), headers: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index 2f51dfb44..58805d54f 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -192,7 +192,8 @@ async fn secrets_are_hidden_in_settings() { "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}", "url": "https://localhost:7777", "request": "{{text}}", - "response": "{{embedding}}" + "response": "{{embedding}}", + "headers": {} } }, "searchCutoffMs": null, From 6c598fa06d5464fcb7e829b3f1dc6a5ebb1e999e Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 22 Jul 2024 16:05:39 +0200 Subject: [PATCH 4/9] test custom headers --- meilisearch/tests/vector/rest.rs | 175 +++++++++++++++++++++++++++++++ 1 file changed, 175 insertions(+) diff --git a/meilisearch/tests/vector/rest.rs b/meilisearch/tests/vector/rest.rs index dd22baad2..71d3c7cda 100644 --- a/meilisearch/tests/vector/rest.rs +++ b/meilisearch/tests/vector/rest.rs @@ -161,6 +161,55 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) { (mock_server, embedder_settings) } +async fn create_mock_raw_with_custom_header() -> (MockServer, Value) { + let mock_server = MockServer::start().await; + + let counter = AtomicUsize::new(0); + + Mock::given(method("POST")) + .and(path("/")) + .respond_with(move |req: &Request| { + match req.headers.get("my-nonstandard-auth") { + Some(x) if x == "bearer of the ring" => {} + Some(x) => { + return ResponseTemplate::new(401).set_body_json( + json!({"error": format!("thou shall not pass, {}", x.to_str().unwrap())}), + ) + } + None => { + return ResponseTemplate::new(401) + .set_body_json(json!({"error": "missing header 'my-nonstandard-auth'"})) + } + } + + let _req: String = match req.body_json() { + Ok(req) => req, + Err(error) => { + return ResponseTemplate::new(400).set_body_json(json!({ + "error": format!("Invalid request: {error}") + })); + } + }; + + let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3]; + + ResponseTemplate::new(200).set_body_json(output) + }) + .mount(&mock_server) + .await; + let url = mock_server.uri(); + + let embedder_settings = json!({ + "source": "rest", + "url": url, + "request": "{{text}}", + "response": "{{embedding}}", + "headers": {"my-nonstandard-auth": "bearer of the ring"} + }); + + (mock_server, embedder_settings) +} + async fn create_mock_raw() -> (MockServer, Value) { let mock_server = MockServer::start().await; @@ -1732,3 +1781,129 @@ async fn server_raw() { } "###); } + +#[actix_rt::test] +async fn server_custom_header() { + let (mock, setting) = create_mock_raw_with_custom_header().await; + + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 0, + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "url": "[url]", + "request": "{{text}}", + "response": "{{embedding}}" + } + } + }, + "error": { + "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index +.update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}", "headers": {"my-nonstandard-auth": "Balrog"} }), + }, +})) +.await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "url": "[url]", + "request": "{{text}}", + "response": "{{embedding}}", + "headers": { + "my-nonstandard-auth": "Balrog" + } + } + } + }, + "error": { + "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "url": "[url]", + "request": "{{text}}", + "response": "{{embedding}}", + "headers": { + "my-nonstandard-auth": "bearer of the ring" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} From 7a347966daaacd9b427bb18928f0f984e0a9c001 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 22 Jul 2024 12:09:52 +0200 Subject: [PATCH 5/9] Allow explicit `dimensions` for ollama --- milli/src/update/settings.rs | 2 -- milli/src/vector/mod.rs | 16 ---------------- milli/src/vector/ollama.rs | 17 ++++++++++++++--- milli/src/vector/settings.rs | 27 ++++++++++++++++++++++----- 4 files changed, 36 insertions(+), 26 deletions(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 2836f4bc9..e5e13a03c 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1667,8 +1667,6 @@ pub fn validate_embedding_settings( } } EmbedderSource::Ollama => { - // Dimensions get inferred, only model name is required - check_unset(&dimensions, EmbeddingSettings::DIMENSIONS, inferred_source, name)?; check_set(&model, EmbeddingSettings::MODEL, inferred_source, name)?; check_unset(&revision, EmbeddingSettings::REVISION, inferred_source, name)?; diff --git a/milli/src/vector/mod.rs b/milli/src/vector/mod.rs index a1c937d24..caccb404b 100644 --- a/milli/src/vector/mod.rs +++ b/milli/src/vector/mod.rs @@ -202,22 +202,6 @@ impl Default for EmbedderOptions { } } -impl EmbedderOptions { - /// Default options for the Hugging Face embedder - pub fn huggingface() -> Self { - Self::HuggingFace(hf::EmbedderOptions::new()) - } - - /// Default options for the OpenAI embedder - pub fn openai(api_key: Option) -> Self { - Self::OpenAi(openai::EmbedderOptions::with_default_model(api_key)) - } - - pub fn ollama(api_key: Option, url: Option) -> Self { - Self::Ollama(ollama::EmbedderOptions::with_default_model(api_key, url)) - } -} - impl Embedder { /// Spawns a new embedder built from its options. pub fn new(options: EmbedderOptions) -> std::result::Result { diff --git a/milli/src/vector/ollama.rs b/milli/src/vector/ollama.rs index d8b75342b..7d41ab4e9 100644 --- a/milli/src/vector/ollama.rs +++ b/milli/src/vector/ollama.rs @@ -17,11 +17,22 @@ pub struct EmbedderOptions { pub url: Option, pub api_key: Option, pub distribution: Option, + pub dimensions: Option, } impl EmbedderOptions { - pub fn with_default_model(api_key: Option, url: Option) -> Self { - Self { embedding_model: "nomic-embed-text".into(), api_key, url, distribution: None } + pub fn with_default_model( + api_key: Option, + url: Option, + dimensions: Option, + ) -> Self { + Self { + embedding_model: "nomic-embed-text".into(), + api_key, + url, + distribution: None, + dimensions, + } } } @@ -31,7 +42,7 @@ impl Embedder { let rest_embedder = match RestEmbedder::new( RestEmbedderOptions { api_key: options.api_key, - dimensions: None, + dimensions: options.dimensions, distribution: options.distribution, url: options.url.unwrap_or_else(get_ollama_path), request: serde_json::json!({ diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index ef0c8f7ff..2a6e18a77 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -152,7 +152,18 @@ impl SettingsDiff { ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); } if dimensions.apply(new_dimensions) { - ReindexAction::push_action(&mut reindex_action, ReindexAction::FullReindex); + match source { + // regenerate on dimensions change in OpenAI since truncation is supported + Setting::Set(EmbedderSource::OpenAi) | Setting::Reset => { + ReindexAction::push_action( + &mut reindex_action, + ReindexAction::FullReindex, + ); + } + // for all other embedders, the parameter is a hint that should not be able to change the result + // and so won't cause a reindex by itself. + _ => {} + } } if url.apply(new_url) { match source { @@ -329,9 +340,12 @@ impl EmbeddingSettings { Self::API_KEY => { &[EmbedderSource::OpenAi, EmbedderSource::Ollama, EmbedderSource::Rest] } - Self::DIMENSIONS => { - &[EmbedderSource::OpenAi, EmbedderSource::UserProvided, EmbedderSource::Rest] - } + Self::DIMENSIONS => &[ + EmbedderSource::OpenAi, + EmbedderSource::UserProvided, + EmbedderSource::Ollama, + EmbedderSource::Rest, + ], Self::DOCUMENT_TEMPLATE => &[ EmbedderSource::HuggingFace, EmbedderSource::OpenAi, @@ -377,6 +391,7 @@ impl EmbeddingSettings { Self::DOCUMENT_TEMPLATE, Self::URL, Self::API_KEY, + Self::DIMENSIONS, Self::DISTRIBUTION, ], EmbedderSource::UserProvided => &[Self::SOURCE, Self::DIMENSIONS, Self::DISTRIBUTION], @@ -486,12 +501,13 @@ impl From for EmbeddingSettings { url, api_key, distribution, + dimensions, }) => Self { source: Setting::Set(EmbedderSource::Ollama), model: Setting::Set(embedding_model), revision: Setting::NotSet, api_key: api_key.map(Setting::Set).unwrap_or_default(), - dimensions: Setting::NotSet, + dimensions: dimensions.map(Setting::Set).unwrap_or_default(), document_template: Setting::Set(prompt.template), url: url.map(Setting::Set).unwrap_or_default(), request: Setting::NotSet, @@ -583,6 +599,7 @@ impl From for EmbeddingConfig { super::ollama::EmbedderOptions::with_default_model( api_key.set(), url.set(), + dimensions.set(), ); if let Some(model) = model.set() { options.embedding_model = model; From 553440632e922cdacecee039983dec74baeadae9 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 22 Jul 2024 12:40:04 +0200 Subject: [PATCH 6/9] Introduce Setting::some_or_not_set --- milli/src/update/settings.rs | 7 +++++++ milli/src/vector/settings.rs | 28 ++++++++++++++-------------- 2 files changed, 21 insertions(+), 14 deletions(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index e5e13a03c..9799fc6ec 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -67,6 +67,13 @@ impl Setting { } } + pub fn some_or_not_set(option: Option) -> Self { + match option { + Some(value) => Setting::Set(value), + None => Setting::NotSet, + } + } + pub const fn as_ref(&self) -> Setting<&T> { match *self { Self::Set(ref value) => Setting::Set(value), diff --git a/milli/src/vector/settings.rs b/milli/src/vector/settings.rs index 2a6e18a77..3cb90cbdb 100644 --- a/milli/src/vector/settings.rs +++ b/milli/src/vector/settings.rs @@ -467,7 +467,7 @@ impl From for EmbeddingSettings { }) => Self { source: Setting::Set(EmbedderSource::HuggingFace), model: Setting::Set(model), - revision: revision.map(Setting::Set).unwrap_or_default(), + revision: Setting::some_or_not_set(revision), api_key: Setting::NotSet, dimensions: Setting::NotSet, document_template: Setting::Set(prompt.template), @@ -475,7 +475,7 @@ impl From for EmbeddingSettings { request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, - distribution: distribution.map(Setting::Set).unwrap_or_default(), + distribution: Setting::some_or_not_set(distribution), }, super::EmbedderOptions::OpenAi(super::openai::EmbedderOptions { url, @@ -487,14 +487,14 @@ impl From for EmbeddingSettings { source: Setting::Set(EmbedderSource::OpenAi), model: Setting::Set(embedding_model.name().to_owned()), revision: Setting::NotSet, - api_key: api_key.map(Setting::Set).unwrap_or_default(), - dimensions: dimensions.map(Setting::Set).unwrap_or_default(), + api_key: Setting::some_or_not_set(api_key), + dimensions: Setting::some_or_not_set(dimensions), document_template: Setting::Set(prompt.template), - url: url.map(Setting::Set).unwrap_or_default(), + url: Setting::some_or_not_set(url), request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, - distribution: distribution.map(Setting::Set).unwrap_or_default(), + distribution: Setting::some_or_not_set(distribution), }, super::EmbedderOptions::Ollama(super::ollama::EmbedderOptions { embedding_model, @@ -506,14 +506,14 @@ impl From for EmbeddingSettings { source: Setting::Set(EmbedderSource::Ollama), model: Setting::Set(embedding_model), revision: Setting::NotSet, - api_key: api_key.map(Setting::Set).unwrap_or_default(), - dimensions: dimensions.map(Setting::Set).unwrap_or_default(), + api_key: Setting::some_or_not_set(api_key), + dimensions: Setting::some_or_not_set(dimensions), document_template: Setting::Set(prompt.template), - url: url.map(Setting::Set).unwrap_or_default(), + url: Setting::some_or_not_set(url), request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, - distribution: distribution.map(Setting::Set).unwrap_or_default(), + distribution: Setting::some_or_not_set(distribution), }, super::EmbedderOptions::UserProvided(super::manual::EmbedderOptions { dimensions, @@ -529,7 +529,7 @@ impl From for EmbeddingSettings { request: Setting::NotSet, response: Setting::NotSet, headers: Setting::NotSet, - distribution: distribution.map(Setting::Set).unwrap_or_default(), + distribution: Setting::some_or_not_set(distribution), }, super::EmbedderOptions::Rest(super::rest::EmbedderOptions { api_key, @@ -543,13 +543,13 @@ impl From for EmbeddingSettings { source: Setting::Set(EmbedderSource::Rest), model: Setting::NotSet, revision: Setting::NotSet, - api_key: api_key.map(Setting::Set).unwrap_or_default(), - dimensions: dimensions.map(Setting::Set).unwrap_or_default(), + api_key: Setting::some_or_not_set(api_key), + dimensions: Setting::some_or_not_set(dimensions), document_template: Setting::Set(prompt.template), url: Setting::Set(url), request: Setting::Set(request), response: Setting::Set(response), - distribution: distribution.map(Setting::Set).unwrap_or_default(), + distribution: Setting::some_or_not_set(distribution), headers: Setting::Set(headers), }, } From 2413592bbf9ea5c18161fc686d839a7b17446cac Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 22 Jul 2024 15:41:12 +0200 Subject: [PATCH 7/9] Display docid when there are documents without manual embeddings for a manual embedder --- .../extract/extract_vector_points.rs | 124 +++++++++++++++++- .../src/update/index_documents/extract/mod.rs | 1 + 2 files changed, 123 insertions(+), 2 deletions(-) diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index b984c3020..a7515282b 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -95,6 +95,84 @@ enum ExtractionAction { DocumentOperation(DocumentOperation), } +struct ManualEmbedderErrors { + embedder_name: String, + docid: String, + other_docids: usize, +} + +impl ManualEmbedderErrors { + pub fn push_error( + errors: &mut Option, + embedder_name: &str, + document_id: impl Fn() -> Value, + ) { + match errors { + Some(errors) => { + if errors.embedder_name == embedder_name { + errors.other_docids = errors.other_docids.saturating_add(1) + } + } + None => { + *errors = Some(Self { + embedder_name: embedder_name.to_owned(), + docid: document_id().to_string(), + other_docids: 0, + }); + } + } + } + + pub fn to_result( + errors: Option, + possible_embedding_mistakes: &PossibleEmbeddingMistakes, + unused_vectors_distribution: &UnusedVectorsDistribution, + ) -> Result<()> { + match errors { + Some(errors) => { + let embedder_name = &errors.embedder_name; + let mut msg = format!( + r"While embedding documents for embedder `{embedder_name}`: no vectors provided for document {}{}", + errors.docid, + if errors.other_docids != 0 { + format!(" and at least {} other document(s)", errors.other_docids) + } else { + "".to_string() + } + ); + + msg += &format!("\n- Note: `{embedder_name}` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.{embedder_name}`."); + + let mut hint_count = 0; + + for (vector_misspelling, count) in + possible_embedding_mistakes.vector_mistakes().take(2) + { + msg += &format!("\n- Hint: try replacing `{vector_misspelling}` by `_vectors` in {count} document(s)."); + hint_count += 1; + } + + for (embedder_misspelling, count) in possible_embedding_mistakes + .embedder_mistakes(embedder_name, unused_vectors_distribution) + .take(2) + { + msg += &format!("\n- Hint: try replacing `_vectors.{embedder_misspelling}` by `_vectors.{embedder_name}` in {count} document(s)."); + hint_count += 1; + } + + if hint_count == 0 { + msg += &format!( + "\n- Hint: opt-out for a document with `_vectors.{embedder_name}: null`" + ); + } + + Err(crate::Error::UserError(crate::UserError::DocumentEmbeddingError(msg))) + } + None => Ok(()), + } + } +} + /// Extracts the embedding vector contained in each document under the `_vectors` field. /// /// Returns the generated grenad reader containing the docid as key associated to the Vec @@ -104,8 +182,10 @@ pub fn extract_vector_points( indexer: GrenadParameters, embedders_configs: &[IndexEmbeddingConfig], settings_diff: &InnerIndexSettingsDiff, + possible_embedding_mistakes: &PossibleEmbeddingMistakes, ) -> Result<(Vec, UnusedVectorsDistribution)> { let mut unused_vectors_distribution = UnusedVectorsDistribution::new(); + let mut manual_errors = None; let reindex_vectors = settings_diff.reindex_vectors(); let old_fields_ids_map = &settings_diff.old.fields_ids_map; @@ -246,7 +326,7 @@ pub fn extract_vector_points( for EmbedderVectorExtractor { embedder_name, - embedder: _, + embedder, prompt, prompts_writer, remove_vectors_writer, @@ -255,6 +335,8 @@ pub fn extract_vector_points( action, } in extractors.iter_mut() { + let embedder_is_manual = matches!(**embedder, Embedder::UserProvided(_)); + let (old, new) = parsed_vectors.remove(embedder_name); let delta = match action { ExtractionAction::SettingsFullReindex => match old { @@ -285,11 +367,29 @@ pub fn extract_vector_points( // this happens only when an existing embedder changed. We cannot regenerate userProvided vectors VectorState::Manual => VectorStateDelta::NoChange, // generated vectors must be regenerated - VectorState::Generated => regenerate_prompt(obkv, prompt, new_fields_ids_map)?, + VectorState::Generated => { + if embedder_is_manual { + ManualEmbedderErrors::push_error( + &mut manual_errors, + embedder_name.as_str(), + document_id, + ); + continue; + } + regenerate_prompt(obkv, prompt, new_fields_ids_map)? + } }, // prompt regeneration is only triggered for existing embedders ExtractionAction::SettingsRegeneratePrompts { old_prompt } => { if old.must_regenerate() { + if embedder_is_manual { + ManualEmbedderErrors::push_error( + &mut manual_errors, + embedder_name.as_str(), + document_id, + ); + continue; + } regenerate_if_prompt_changed( obkv, (old_prompt, prompt), @@ -311,6 +411,9 @@ pub fn extract_vector_points( (old, new), (old_fields_ids_map, new_fields_ids_map), document_id, + embedder_name, + embedder_is_manual, + &mut manual_errors, )?, }; // and we finally push the unique vectors into the writer @@ -326,6 +429,12 @@ pub fn extract_vector_points( unused_vectors_distribution.append(parsed_vectors); } + ManualEmbedderErrors::to_result( + manual_errors, + possible_embedding_mistakes, + &unused_vectors_distribution, + )?; + let mut results = Vec::new(); for EmbedderVectorExtractor { @@ -371,6 +480,9 @@ fn extract_vector_document_diff( (old, new): (VectorState, VectorState), (old_fields_ids_map, new_fields_ids_map): (&FieldsIdsMap, &FieldsIdsMap), document_id: impl Fn() -> Value, + embedder_name: &str, + embedder_is_manual: bool, + manual_errors: &mut Option, ) -> Result { match (old.must_regenerate(), new.must_regenerate()) { (true, true) | (false, false) => {} @@ -408,6 +520,10 @@ fn extract_vector_document_diff( .any(|deladd| deladd.get(DelAdd::Addition).is_some()); if document_is_kept { + if embedder_is_manual { + ManualEmbedderErrors::push_error(manual_errors, embedder_name, document_id); + return Ok(VectorStateDelta::NoChange); + } // Don't give up if the old prompt was failing let old_prompt = Some(&prompt).map(|p| { p.render(obkv, DelAdd::Deletion, old_fields_ids_map).unwrap_or_default() @@ -439,6 +555,10 @@ fn extract_vector_document_diff( .map(|(_, deladd)| KvReaderDelAdd::new(deladd)) .any(|deladd| deladd.get(DelAdd::Addition).is_some()); if document_is_kept { + if embedder_is_manual { + ManualEmbedderErrors::push_error(manual_errors, embedder_name, document_id); + return Ok(VectorStateDelta::NoChange); + } // becomes autogenerated VectorStateDelta::NowGenerated(prompt.render( obkv, diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 6c23a8da9..cab84400c 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -251,6 +251,7 @@ fn send_original_documents_data( indexer, &embedders_configs, &settings_diff, + &possible_embedding_mistakes, ) { Ok((extracted_vectors, unused_vectors_distribution)) => { for ExtractedVectorPoints { From 8532fe8afc52e0280e1e13ee2285e71ddcfd8f72 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 22 Jul 2024 15:42:04 +0200 Subject: [PATCH 8/9] Fix tests --- meilisearch/tests/vector/mod.rs | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 43c2ff606..4a142f86a 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -487,10 +487,11 @@ async fn user_provided_embeddings_error() { #[actix_rt::test] async fn user_provided_vectors_error() { let server = Server::new().await; + let index = generate_default_user_provided_documents(&server).await; // First case, we forget to specify `_vectors` - let documents = json!({"id": 42, "name": "kefir"}); + let documents = json!([{"id": 40, "name": "kefir"}, {"id": 41, "name": "intel"}, {"id": 42, "name": "max"}, {"id": 43, "name": "venus"}, {"id": 44, "name": "eva"}]); let (value, code) = index.add_documents(documents, None).await; snapshot!(code, @"202 Accepted"); let task = index.wait_task(value.uid()).await; @@ -502,11 +503,11 @@ async fn user_provided_vectors_error() { "type": "documentAdditionOrUpdate", "canceledBy": null, "details": { - "receivedDocuments": 1, + "receivedDocuments": 5, "indexedDocuments": 0 }, "error": { - "message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: \n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`", + "message": "While embedding documents for embedder `manual`: no vectors provided for document \"40\" and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -535,7 +536,7 @@ async fn user_provided_vectors_error() { "indexedDocuments": 0 }, "error": { - "message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: \n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n _vector: manaul000\n _vector.manaul: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).", + "message": "While embedding documents for embedder `manual`: no vectors provided for document \"42\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -564,7 +565,7 @@ async fn user_provided_vectors_error() { "indexedDocuments": 0 }, "error": { - "message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: manaul000\n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n _vectors.manaul: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).", + "message": "While embedding documents for embedder `manual`: no vectors provided for document \"42\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" From d4ea7cc2a94250d0bb749633acb4afcfcea5805f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Mon, 22 Jul 2024 16:37:17 +0200 Subject: [PATCH 9/9] =?UTF-8?q?fix=20clippy=20=F0=9F=91=89=F0=9F=91=88?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .../src/update/index_documents/extract/extract_vector_points.rs | 1 + 1 file changed, 1 insertion(+) diff --git a/milli/src/update/index_documents/extract/extract_vector_points.rs b/milli/src/update/index_documents/extract/extract_vector_points.rs index a7515282b..f66c3fd46 100644 --- a/milli/src/update/index_documents/extract/extract_vector_points.rs +++ b/milli/src/update/index_documents/extract/extract_vector_points.rs @@ -472,6 +472,7 @@ pub fn extract_vector_points( Ok((results, unused_vectors_distribution)) } +#[allow(clippy::too_many_arguments)] // feel free to find efficient way to factor arguments fn extract_vector_document_diff( docid: DocumentId, obkv: obkv::KvReader<'_, FieldId>,