From 988552e1783786b0b065d25a0060529fdf81db07 Mon Sep 17 00:00:00 2001 From: Tamo Date: Wed, 12 Jun 2024 18:27:03 +0200 Subject: [PATCH] add tests on the rest embedder --- Cargo.lock | 82 +- index-scheduler/src/lib.rs | 4 + ...ex_scheduler__tests__import_vectors-2.snap | 4 +- ...ex_scheduler__tests__import_vectors-5.snap | 13 +- ...ndex_scheduler__tests__import_vectors.snap | 4 +- ...x_scheduler__tests__settings_update-2.snap | 4 +- ...x_scheduler__tests__settings_update-5.snap | 15 +- ...dex_scheduler__tests__settings_update.snap | 4 +- .../Intel to kefir succeeds.snap | 3 +- .../lib.rs/import_vectors/Intel to kefir.snap | 3 +- .../import_vectors/adding Intel succeeds.snap | 3 +- .../import_vectors/after adding Intel.snap | 3 +- ...ter_registering_settings_task_vectors.snap | 3 +- .../settings_update_processed_vectors.snap | 3 +- .../after_registering_settings_task.snap | 3 +- .../settings_update_processed.snap | 3 +- meilisearch/Cargo.toml | 1 + meilisearch/tests/common/mod.rs | 9 +- meilisearch/tests/search/hybrid.rs | 2 +- meilisearch/tests/settings/get_settings.rs | 19 +- meilisearch/tests/vector/mod.rs | 7 +- meilisearch/tests/vector/rest.rs | 1734 +++++++++++++++++ milli/src/update/index_documents/mod.rs | 7 +- 23 files changed, 1867 insertions(+), 66 deletions(-) create mode 100644 meilisearch/tests/vector/rest.rs diff --git a/Cargo.lock b/Cargo.lock index bf0415915..c72053be7 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -55,7 +55,7 @@ dependencies = [ "encoding_rs", "flate2", "futures-core", - "h2", + "h2 0.3.26", "http 0.2.11", "httparse", "httpdate", @@ -403,6 +403,16 @@ dependencies = [ "thiserror", ] +[[package]] +name = "assert-json-diff" +version = "2.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12" +dependencies = [ + "serde", + "serde_json", +] + [[package]] name = "async-trait" version = "0.1.81" @@ -414,6 +424,12 @@ dependencies = [ "syn 2.0.60", ] +[[package]] +name = "atomic-waker" +version = "1.1.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0" + [[package]] name = "autocfg" version = "1.2.0" @@ -1377,6 +1393,24 @@ dependencies = [ "syn 2.0.60", ] +[[package]] +name = "deadpool" +version = "0.10.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fb84100978c1c7b37f09ed3ce3e5f843af02c2a2c431bae5b19230dad2c1b490" +dependencies = [ + "async-trait", + "deadpool-runtime", + "num_cpus", + "tokio", +] + +[[package]] +name = "deadpool-runtime" +version = "0.1.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "092966b41edc516079bdf31ec78a2e0588d1d0c08f78b91d8307215928642b2b" + [[package]] name = "debugid" version = "0.8.0" @@ -2231,6 +2265,25 @@ dependencies = [ "tracing", ] +[[package]] +name = "h2" +version = "0.4.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "fa82e28a107a8cc405f0839610bdc9b15f1e25ec7d696aa5cf173edbcb1486ab" +dependencies = [ + "atomic-waker", + "bytes", + "fnv", + "futures-core", + "futures-sink", + "http 1.1.0", + "indexmap", + "slab", + "tokio", + "tokio-util", + "tracing", +] + [[package]] name = "half" version = "1.8.2" @@ -2441,9 +2494,11 @@ dependencies = [ "bytes", "futures-channel", "futures-util", + "h2 0.4.5", "http 1.1.0", "http-body", "httparse", + "httpdate", "itoa", "pin-project-lite", "smallvec", @@ -3423,6 +3478,7 @@ dependencies = [ "url", "urlencoding", "uuid", + "wiremock", "yaup", "zip 2.1.3", ] @@ -6281,6 +6337,30 @@ dependencies = [ "windows-sys 0.48.0", ] +[[package]] +name = "wiremock" +version = "0.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ec874e1eef0df2dcac546057fe5e29186f09c378181cd7b635b4b7bcc98e9d81" +dependencies = [ + "assert-json-diff", + "async-trait", + "base64 0.21.7", + "deadpool", + "futures", + "http 1.1.0", + "http-body-util", + "hyper", + "hyper-util", + "log", + "once_cell", + "regex", + "serde", + "serde_json", + "tokio", + "url", +] + [[package]] name = "wyz" version = "0.5.1" diff --git a/index-scheduler/src/lib.rs b/index-scheduler/src/lib.rs index ca9bca820..c9ea70bb8 100644 --- a/index-scheduler/src/lib.rs +++ b/index-scheduler/src/lib.rs @@ -3047,6 +3047,8 @@ mod tests { api_key: Setting::Set(S("My super secret")), url: Setting::Set(S("http://localhost:7777")), dimensions: Setting::Set(4), + request: Setting::Set(serde_json::json!("{{text}}")), + response: Setting::Set(serde_json::json!("{{embedding}}")), ..Default::default() }; embedders.insert(S("default"), Setting::Set(embedding_settings)); @@ -5006,6 +5008,8 @@ mod tests { api_key: Setting::Set(S("My super secret")), url: Setting::Set(S("http://localhost:7777")), dimensions: Setting::Set(384), + request: Setting::Set(serde_json::json!("{{text}}")), + response: Setting::Set(serde_json::json!("{{embedding}}")), ..Default::default() }; embedders.insert(S("A_fakerest"), Setting::Set(embedding_settings)); diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap index bc16fc8be..629ea87dc 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap +++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-2.snap @@ -8,7 +8,9 @@ expression: task.details "source": "rest", "apiKey": "MyXXXX...", "dimensions": 384, - "url": "http://localhost:7777" + "url": "http://localhost:7777", + "request": "{{text}}", + "response": "{{embedding}}" }, "B_small_hf": { "source": "huggingFace", diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap index 013115a58..97a67ea0a 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap +++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors-5.snap @@ -8,16 +8,7 @@ expression: fakerest_config.embedder_options "distribution": null, "dimensions": 384, "url": "http://localhost:7777", - "query": null, - "input_field": [ - "input" - ], - "path_to_embeddings": [ - "data" - ], - "embedding_object": [ - "embedding" - ], - "input_type": "text" + "request": "{{text}}", + "response": "{{embedding}}" } } diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap index bc16fc8be..629ea87dc 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap +++ b/index-scheduler/src/snapshots/index_scheduler__tests__import_vectors.snap @@ -8,7 +8,9 @@ expression: task.details "source": "rest", "apiKey": "MyXXXX...", "dimensions": 384, - "url": "http://localhost:7777" + "url": "http://localhost:7777", + "request": "{{text}}", + "response": "{{embedding}}" }, "B_small_hf": { "source": "huggingFace", diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap b/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap index 72a25f915..2b76f46a6 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap +++ b/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-2.snap @@ -8,7 +8,9 @@ expression: task.details "source": "rest", "apiKey": "MyXXXX...", "dimensions": 4, - "url": "http://localhost:7777" + "url": "http://localhost:7777", + "request": "{{text}}", + "response": "{{embedding}}" } } } diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap b/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap index f7ae1c00a..a88f94df3 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap +++ b/index-scheduler/src/snapshots/index_scheduler__tests__settings_update-5.snap @@ -1,6 +1,6 @@ --- source: index-scheduler/src/lib.rs -expression: embedding_config.embedder_options +expression: config.embedder_options --- { "Rest": { @@ -8,16 +8,7 @@ expression: embedding_config.embedder_options "distribution": null, "dimensions": 4, "url": "http://localhost:7777", - "query": null, - "input_field": [ - "input" - ], - "path_to_embeddings": [ - "data" - ], - "embedding_object": [ - "embedding" - ], - "input_type": "text" + "request": "{{text}}", + "response": "{{embedding}}" } } diff --git a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap b/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap index 72a25f915..2b76f46a6 100644 --- a/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap +++ b/index-scheduler/src/snapshots/index_scheduler__tests__settings_update.snap @@ -8,7 +8,9 @@ expression: task.details "source": "rest", "apiKey": "MyXXXX...", "dimensions": 4, - "url": "http://localhost:7777" + "url": "http://localhost:7777", + "request": "{{text}}", + "response": "{{embedding}}" } } } diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap index 6b285ba56..6f2da1f17 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- @@ -46,4 +46,3 @@ doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1 ### File Store: ---------------------------------------------------------------------- - diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap index 6f23d96fd..569556a19 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- @@ -45,4 +45,3 @@ doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1 00000000-0000-0000-0000-000000000001 ---------------------------------------------------------------------- - diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap index 5dcb5a4f7..b626d8bc5 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: @@ -42,4 +42,3 @@ doggos: { number_of_documents: 1, field_distribution: {"_vectors": 1, "breed": 1 ### File Store: ---------------------------------------------------------------------- - diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap index 80521df42..65f758f32 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: @@ -41,4 +41,3 @@ doggos: { number_of_documents: 0, field_distribution: {} } 00000000-0000-0000-0000-000000000000 ---------------------------------------------------------------------- - diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap index 97b669f44..9c628461d 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] @@ -33,4 +33,3 @@ doggos [0,] ### File Store: ---------------------------------------------------------------------- - diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap index f3ce4b104..eddf6d7e8 100644 --- a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap +++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] @@ -37,4 +37,3 @@ doggos: { number_of_documents: 0, field_distribution: {} } ### File Store: ---------------------------------------------------------------------- - diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap index dad082667..7873fb6cf 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [0,] @@ -33,4 +33,3 @@ doggos [0,] ### File Store: ---------------------------------------------------------------------- - diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap index 271db8765..8a4838094 100644 --- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap +++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap @@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs [] ---------------------------------------------------------------------- ### All Tasks: -0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), query: NotSet, input_field: NotSet, path_to_embeddings: NotSet, embedding_object: NotSet, input_type: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} +0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData }, is_deletion: false, allow_index_creation: true }} ---------------------------------------------------------------------- ### Status: enqueued [] @@ -37,4 +37,3 @@ doggos: { number_of_documents: 0, field_distribution: {} } ### File Store: ---------------------------------------------------------------------- - diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 7db47f7ad..e614ecc6a 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -114,6 +114,7 @@ maplit = "1.0.2" meili-snap = { path = "../meili-snap" } temp-env = "0.3.6" urlencoding = "2.1.3" +wiremock = "0.6.0" yaup = "0.3.1" [build-dependencies] diff --git a/meilisearch/tests/common/mod.rs b/meilisearch/tests/common/mod.rs index adcef9fe6..ee1d8aa6e 100644 --- a/meilisearch/tests/common/mod.rs +++ b/meilisearch/tests/common/mod.rs @@ -80,7 +80,14 @@ impl Display for Value { write!( f, "{}", - json_string!(self, { ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", ".duration" => "[duration]", ".processingTimeMs" => "[duration]" }) + json_string!(self, { + ".enqueuedAt" => "[date]", + ".startedAt" => "[date]", + ".finishedAt" => "[date]", + ".duration" => "[duration]", + ".processingTimeMs" => "[duration]", + ".details.embedders.*.url" => "[url]" + }) ) } } diff --git a/meilisearch/tests/search/hybrid.rs b/meilisearch/tests/search/hybrid.rs index 95d3bb933..ee4181694 100644 --- a/meilisearch/tests/search/hybrid.rs +++ b/meilisearch/tests/search/hybrid.rs @@ -497,7 +497,7 @@ async fn query_combination() { snapshot!(code, @"400 Bad Request"); snapshot!(response, @r###" { - "message": "Error while generating embeddings: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \"Captain\"", + "message": "Error while generating embeddings: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - `Captain`", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index 5571a1e03..239151197 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -116,6 +116,8 @@ async fn secrets_are_hidden_in_settings() { "url": "https://localhost:7777", "apiKey": "My super secret value you will never guess", "dimensions": 4, + "request": "{{text}}", + "response": "{{embedding}}" } } })) @@ -189,17 +191,8 @@ async fn secrets_are_hidden_in_settings() { "dimensions": 4, "documentTemplate": "{% for field in fields %} {{ field.name }}: {{ field.value }}\n{% endfor %}", "url": "https://localhost:7777", - "query": null, - "inputField": [ - "input" - ], - "pathToEmbeddings": [ - "data" - ], - "embeddingObject": [ - "embedding" - ], - "inputType": "text" + "request": "{{text}}", + "response": "{{embedding}}" } }, "searchCutoffMs": null @@ -215,7 +208,9 @@ async fn secrets_are_hidden_in_settings() { "source": "rest", "apiKey": "My suXXXXXX...", "dimensions": 4, - "url": "https://localhost:7777" + "url": "https://localhost:7777", + "request": "{{text}}", + "response": "{{embedding}}" } } } diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 0419f4533..43c2ff606 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -1,3 +1,4 @@ +mod rest; mod settings; use meili_snap::{json_string, snapshot}; @@ -505,7 +506,7 @@ async fn user_provided_vectors_error() { "indexedDocuments": 0 }, "error": { - "message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: \\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`", + "message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: \n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -534,7 +535,7 @@ async fn user_provided_vectors_error() { "indexedDocuments": 0 }, "error": { - "message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: \\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n _vector: manaul000\\n _vector.manaul: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).", + "message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: \n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n _vector: manaul000\n _vector.manaul: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -563,7 +564,7 @@ async fn user_provided_vectors_error() { "indexedDocuments": 0 }, "error": { - "message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided: \" id: 42\\n name: kefir\\n _vectors: manaul000\\n _vectors.manual: \\n _vectors.manual.regenerate: \\n _vectors.manual.embeddings: \\n _vectors.manaul: \\n\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).", + "message": "While embedding documents for embedder `manual`: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - ` id: 42\n name: kefir\n _vectors: manaul000\n _vectors.manual: \n _vectors.manual.regenerate: \n _vectors.manual.embeddings: \n _vectors.manaul: \n`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" diff --git a/meilisearch/tests/vector/rest.rs b/meilisearch/tests/vector/rest.rs new file mode 100644 index 000000000..dd22baad2 --- /dev/null +++ b/meilisearch/tests/vector/rest.rs @@ -0,0 +1,1734 @@ +use std::sync::atomic::{AtomicUsize, Ordering}; + +use meili_snap::{json_string, snapshot}; +use reqwest::IntoUrl; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, Request, ResponseTemplate}; + +use crate::common::{Server, Value}; +use crate::json; +use crate::vector::GetAllDocumentsOptions; + +async fn create_mock() -> (MockServer, Value) { + let mock_server = MockServer::start().await; + + let counter = AtomicUsize::new(0); + + Mock::given(method("POST")) + .and(path("/")) + .respond_with(move |_req: &Request| { + let counter = counter.fetch_add(1, Ordering::Relaxed); + ResponseTemplate::new(200).set_body_json(json!({ "data": vec![counter; 3] })) + }) + .mount(&mock_server) + .await; + let url = mock_server.uri(); + + let embedder_settings = json!({ + "source": "rest", + "url": url, + "dimensions": 3, + "request": "{{text}}", + "response": { + "data": "{{embedding}}" + } + }); + + (mock_server, embedder_settings) +} + +#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] +struct MultipleRequest { + input: Vec, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +struct MultipleResponse { + output: Vec, +} + +#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)] +struct SingleResponse { + text: String, + embedding: Vec, +} + +async fn create_mock_multiple() -> (MockServer, Value) { + let mock_server = MockServer::start().await; + + let counter = AtomicUsize::new(0); + + Mock::given(method("POST")) + .and(path("/")) + .respond_with(move |req: &Request| { + let req: MultipleRequest = match req.body_json() { + Ok(req) => req, + Err(error) => { + return ResponseTemplate::new(400).set_body_json(json!({ + "error": format!("Invalid request: {error}") + })); + } + }; + + let output = req + .input + .into_iter() + .map(|text| SingleResponse { + text, + embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3], + }) + .collect(); + + let response = MultipleResponse { output }; + + ResponseTemplate::new(200).set_body_json(response) + }) + .mount(&mock_server) + .await; + let url = mock_server.uri(); + + let embedder_settings = json!({ + "source": "rest", + "url": url, + "dimensions": 3, + "request": { + "input": ["{{text}}", "{{..}}"] + }, + "response": { + "output": [ + { + "embedding": "{{embedding}}" + }, + "{{..}}" + ] + } + }); + + (mock_server, embedder_settings) +} + +#[derive(Debug, Clone, serde::Deserialize, serde::Serialize)] +struct SingleRequest { + input: String, +} + +async fn create_mock_single_response_in_array() -> (MockServer, Value) { + let mock_server = MockServer::start().await; + + let counter = AtomicUsize::new(0); + + Mock::given(method("POST")) + .and(path("/")) + .respond_with(move |req: &Request| { + let req: SingleRequest = match req.body_json() { + Ok(req) => req, + Err(error) => { + return ResponseTemplate::new(400).set_body_json(json!({ + "error": format!("Invalid request: {error}") + })); + } + }; + + let output = vec![SingleResponse { + text: req.input, + embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3], + }]; + + let response = MultipleResponse { output }; + + ResponseTemplate::new(200).set_body_json(response) + }) + .mount(&mock_server) + .await; + let url = mock_server.uri(); + + let embedder_settings = json!({ + "source": "rest", + "url": url, + "dimensions": 3, + "request": { + "input": "{{text}}" + }, + "response": { + "output": [ + { + "embedding": "{{embedding}}" + } + ] + } + }); + + (mock_server, embedder_settings) +} + +async fn create_mock_raw() -> (MockServer, Value) { + let mock_server = MockServer::start().await; + + let counter = AtomicUsize::new(0); + + Mock::given(method("POST")) + .and(path("/")) + .respond_with(move |req: &Request| { + let _req: String = match req.body_json() { + Ok(req) => req, + Err(error) => { + return ResponseTemplate::new(400).set_body_json(json!({ + "error": format!("Invalid request: {error}") + })); + } + }; + + let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3]; + + ResponseTemplate::new(200).set_body_json(output) + }) + .mount(&mock_server) + .await; + let url = mock_server.uri(); + + let embedder_settings = json!({ + "source": "rest", + "url": url, + "dimensions": 3, + "request": "{{text}}", + "response": "{{embedding}}" + }); + + (mock_server, embedder_settings) +} + +pub async fn post(url: T) -> reqwest::Result { + reqwest::Client::builder().build()?.post(url).send().await +} + +#[actix_rt::test] +async fn dummy_testing_the_mock() { + let (mock, _setting) = create_mock().await; + let body = post(&mock.uri()).await.unwrap().text().await.unwrap(); + snapshot!(body, @r###"{"data":[0,0,0]}"###); + let body = post(&mock.uri()).await.unwrap().text().await.unwrap(); + snapshot!(body, @r###"{"data":[1,1,1]}"###); + let body = post(&mock.uri()).await.unwrap().text().await.unwrap(); + snapshot!(body, @r###"{"data":[2,2,2]}"###); + let body = post(&mock.uri()).await.unwrap().text().await.unwrap(); + snapshot!(body, @r###"{"data":[3,3,3]}"###); + let body = post(&mock.uri()).await.unwrap().text().await.unwrap(); + snapshot!(body, @r###"{"data":[4,4,4]}"###); +} + +async fn get_server_vector() -> Server { + let server = Server::new().await; + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + server +} + +#[actix_rt::test] +async fn bad_request() { + let (mock, _setting) = create_mock().await; + + let server = get_server_vector().await; + let index = server.index("doggo"); + + // No placeholder string appear in the template + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": "54", "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `request`: \"{{text}}\" not found", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // A repeat string appears inside a repeated value + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": { + "input": [ + { + "input": [ + "{{text}}", + "{{..}}" + ] + }, + "{{..}}" + ] + }, "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `request.input.input`: \"{{..}}\" appears nested inside of a value that is itself repeated", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // A repeat string appears outside of an array + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": { + "input": { + "input": "{{text}}", + "repeat": "{{..}}" + } + }, "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `request.input.repeat`: \"{{..}}\" appears outside of an array", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // A repeat string appears in an array, but not in the second position + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": { + "input": [ + "{{..}}", + "{{text}}" + ] + }, "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `request.input`: \"{{..}}\" expected at position #1, but found at position #0", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": { + "input": [ + "{{text}}", + "42", + "{{..}}", + ] + }, "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `request.input`: \"{{..}}\" expected at position #1, but found at position #2", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // A repeated value lacks a placeholder + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": { + "input": [ + "42", + "{{..}}", + ] + }, "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `request.input[0]`: Expected \"{{text}}\" inside of the repeated value", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // Multiple repeat strings appear in the template + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": { + "input": [ + "{{text}}", + "{{..}}", + ], + "data": [ + "42", + "{{..}}", + ], + }, "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `request.data`: Found \"{{..}}\", but it was already present in `request.input`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // Multiple placeholder strings appear in the template + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": { + "input": "{{text}}", + "data": "{{text}}", + }, "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `request.data`: Found \"{{text}}\", but it was already present in `request.input`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": + {"repeated": [{ + "input": "{{text}}", + "data": [42, "{{text}}"], + }, "{{..}}"]}, "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `request.repeated.data[1]`: Found \"{{text}}\", but it was already present in `request.repeated.input`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // A placeholder appears both inside a repeated value and outside of it + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": { + "input": ["{{text}}", "{{..}}"], + "data": "{{text}}", + }, "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `request.data`: Found \"{{text}}\", but it was already present in `request.input[0]` (repeated)", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); +} + +#[actix_rt::test] +async fn bad_response() { + let (mock, _setting) = create_mock().await; + + let server = get_server_vector().await; + let index = server.index("doggo"); + + // No placeholder string appear in the template + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "42" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response`: \"{{embedding}}\" not found", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // A repeat string appears inside a repeated value + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": { + "output": [ + { + "output": [ + "{{embedding}}", + "{{..}}" + ] + }, + "{{..}}" + ] + }, "request": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response.output.output`: \"{{..}}\" appears nested inside of a value that is itself repeated", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // A repeat string appears outside of an array + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": { + "output": { + "output": "{{embedding}}", + "repeat": "{{..}}" + } + }, "request": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response.output.repeat`: \"{{..}}\" appears outside of an array", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // A repeat string appears in an array, but not in the second position + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": { + "output": [ + "{{..}}", + "{{embedding}}" + ] + }, "request": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response.output`: \"{{..}}\" expected at position #1, but found at position #0", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": { + "output": [ + "{{embedding}}", + "42", + "{{..}}", + ] + }, "request": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response.output`: \"{{..}}\" expected at position #1, but found at position #2", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // A repeated value lacks a placeholder + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": { + "output": [ + "42", + "{{..}}", + ] + }, "request": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response.output[0]`: Expected \"{{embedding}}\" inside of the repeated value", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // Multiple repeat strings appear in the template + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": { + "output": [ + "{{embedding}}", + "{{..}}", + ], + "data": [ + "42", + "{{..}}", + ], + }, "request": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response.data`: Found \"{{..}}\", but it was already present in `response.output`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // Multiple placeholder strings appear in the template + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": { + "output": [{"type": "data", "data": "{{embedding}}"}], + "data": "{{embedding}}", + }, "request": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response.data`: Found \"{{embedding}}\", but it was already present in `response.output[0].data`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": + {"repeated": [{ + "output": "{{embedding}}", + "data": [42, "{{embedding}}"], + }, "{{..}}"]}, "request": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response.repeated.data[1]`: Found \"{{embedding}}\", but it was already present in `response.repeated.output`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // A placeholder appears both inside a repeated value and outside of it + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": { + "output": ["{{embedding}}", "{{..}}"], + "data": "{{embedding}}", + }, "request": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response.data`: Found \"{{embedding}}\", but it was already present in `response.output[0]` (repeated)", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // request sends a single text but response expects multiple embeddings + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": { + "data": ["{{embedding}}", "{{..}}"], + }, "request": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response`: `response` has multiple embeddings, but `request` has only one text to embed", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + // request sends multiple texts but response expects a single embedding + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": { + "data": "{{embedding}}", + }, "request": {"data": ["{{text}}", "{{..}}"]} }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response`: `response` has a single embedding, but `request` has multiple texts to embed", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); +} + +#[actix_rt::test] +async fn bad_settings() { + let (mock, _setting) = create_mock().await; + + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "request": 42, "response": 42 }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `request`: \"{{text}}\" not found", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": "kefir", "request": 42, "response": 42 }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.rest.url`: could not parse `kefir`: relative URL without a base", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "response": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.rest`: Missing field `request` (note: this field is mandatory for source rest)", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}" }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.rest`: Missing field `response` (note: this field is mandatory for source rest)", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": 42 }), + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Error while generating embeddings: user error: in `response`: \"{{embedding}}\" not found", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 0, + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "url": "[url]", + "request": "{{text}}", + "response": "{{embedding}}" + } + } + }, + "error": { + "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting a single \"{{embedding}}\", expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // Validate an embedder with a bad dimension of 2 instead of 3 + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": { "data": "{{embedding}}" }, "dimensions": 2 }), + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + + let (response, code) = index.add_documents(json!( { "id": 1, "name": "kefir" }), None).await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "failed", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 0 + }, + "error": { + "message": "While embedding documents for embedder `rest`: runtime error: was expecting embeddings of dimension `2`, got embeddings of dimensions `3`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_rt::test] +async fn add_vector_and_user_provided() { + let (_mock, setting) = create_mock().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir"}, + {"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }}, + {"id": 2, "name": "intel"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 3, + "indexedDocuments": 3 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "rest": { + "embeddings": [ + [ + 0.0, + 0.0, + 0.0 + ] + ], + "regenerate": true + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "rest": { + "embeddings": [ + [ + 1.0, + 1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 2, + "name": "intel", + "_vectors": { + "rest": { + "embeddings": [ + [ + 1.0, + 1.0, + 1.0 + ] + ], + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 3 + } + "###); +} + +#[actix_rt::test] +async fn server_returns_bad_request() { + let (mock, _setting) = create_mock_multiple().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}" }), + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 0, + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "url": "[url]", + "request": "{{text}}", + "response": "{{embedding}}" + } + } + }, + "error": { + "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\"test\\\", expected struct MultipleRequest at line 1 column 6\"}`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": "{{embedding}}", "dimensions": 3 }), + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "dimensions": 3, + "url": "[url]", + "request": "{{text}}", + "response": "{{embedding}}" + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index.add_documents(json!( { "id": 1, "name": "kefir" }), None).await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "failed", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 0 + }, + "error": { + "message": "While embedding documents for embedder `rest`: user error: sent a bad request to embedding server\n - Hint: check that the `request` in the embedder configuration matches the remote server's API\n - server replied with `{\"error\":\"Invalid request: invalid type: string \\\" id: 1\\\\n name: kefir\\\\n\\\", expected struct MultipleRequest at line 1 column 24\"}`", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_rt::test] +async fn server_returns_bad_response() { + let (mock, _setting) = create_mock_multiple().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), + "request": { + "input": ["{{text}}", "{{..}}"] + }, + "response": ["{{embedding}}", "{{..}}"] }), + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 0, + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "url": "[url]", + "request": { + "input": [ + "{{text}}", + "{{..}}" + ] + }, + "response": [ + "{{embedding}}", + "{{..}}" + ] + } + } + }, + "error": { + "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting the array of \"{{embedding}}\"s, configuration expects `response` to be an array with at least 1 item(s) but server sent an object with 1 field(s)", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), + "request": { + "input": ["{{text}}", "{{..}}"] + }, + "response": { + "output": ["{{embedding}}", "{{..}}"] + } }), + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "url": "[url]", + "request": { + "input": [ + "{{text}}", + "{{..}}" + ] + }, + "response": { + "output": [ + "{{embedding}}", + "{{..}}" + ] + } + } + } + }, + "error": { + "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response`, while extracting item #0 from the array of \"{{embedding}}\"s, expected `response` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected a sequence", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), + "request": { + "input": ["{{text}}"] + }, + "response": { + "output": "{{embedding}}" + } }), + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "url": "[url]", + "request": { + "input": [ + "{{text}}" + ] + }, + "response": { + "output": "{{embedding}}" + } + } + } + }, + "error": { + "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output`, while extracting a single \"{{embedding}}\", expected `output` to be an array of numbers, but failed to parse server response:\n - invalid type: map, expected f32", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), + "request": { + "input": ["{{text}}", "{{..}}"] + }, + "response": { + "output": [{ "embedding": + { + "data": "{{embedding}}" + } + }, "{{..}}"] + } }), + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 3, + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "url": "[url]", + "request": { + "input": [ + "{{text}}", + "{{..}}" + ] + }, + "response": { + "output": [ + { + "embedding": { + "data": "{{embedding}}" + } + }, + "{{..}}" + ] + } + } + } + }, + "error": { + "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.embedding`, while extracting item #0 from the array of \"{{embedding}}\"s, configuration expects `embedding` to be an object with key `data` but server sent an array of size 3", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": json!({ "source": "rest", "url": mock.uri(), + "request": { + "input": ["{{text}}"] + }, + "response": { + "output": [ + { "embeddings": + { + "data": "{{embedding}}" + } + } + ] + } }), + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 4, + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "url": "[url]", + "request": { + "input": [ + "{{text}}" + ] + }, + "response": { + "output": [ + { + "embeddings": { + "data": "{{embedding}}" + } + } + ] + } + } + } + }, + "error": { + "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with runtime error: error extracting embeddings from the response:\n - in `response.output[0]`, while extracting a single \"{{embedding}}\", configuration expects key \"embeddings\", which is missing in response\n - Hint: item #0 inside `output` has key `embedding`, did you mean `response.output[0].embedding` in embedder configuration?", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); +} + +#[actix_rt::test] +async fn server_returns_multiple() { + let (_mock, setting) = create_mock_multiple().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir"}, + {"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }}, + {"id": 2, "name": "intel"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 3, + "indexedDocuments": 3 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "rest": { + "embeddings": [ + [ + 0.0, + 0.0, + 0.0 + ] + ], + "regenerate": true + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "rest": { + "embeddings": [ + [ + 1.0, + 1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 2, + "name": "intel", + "_vectors": { + "rest": { + "embeddings": [ + [ + 1.0, + 1.0, + 1.0 + ] + ], + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 3 + } + "###); +} + +#[actix_rt::test] +async fn server_single_input_returns_in_array() { + let (_mock, setting) = create_mock_single_response_in_array().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir"}, + {"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }}, + {"id": 2, "name": "intel"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 3, + "indexedDocuments": 3 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "rest": { + "embeddings": [ + [ + 0.0, + 0.0, + 0.0 + ] + ], + "regenerate": true + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "rest": { + "embeddings": [ + [ + 1.0, + 1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 2, + "name": "intel", + "_vectors": { + "rest": { + "embeddings": [ + [ + 1.0, + 1.0, + 1.0 + ] + ], + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 3 + } + "###); +} + +#[actix_rt::test] +async fn server_raw() { + let (_mock, setting) = create_mock_raw().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "rest": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir"}, + {"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }}, + {"id": 2, "name": "intel"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 3, + "indexedDocuments": 3 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "rest": { + "embeddings": [ + [ + 0.0, + 0.0, + 0.0 + ] + ], + "regenerate": true + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "rest": { + "embeddings": [ + [ + 1.0, + 1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 2, + "name": "intel", + "_vectors": { + "rest": { + "embeddings": [ + [ + 1.0, + 1.0, + 1.0 + ] + ], + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 3 + } + "###); +} diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 7f07dafed..2521b778f 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -2741,11 +2741,8 @@ mod tests { dimensions: Setting::Set(3), document_template: Setting::NotSet, url: Setting::NotSet, - query: Setting::NotSet, - input_field: Setting::NotSet, - path_to_embeddings: Setting::NotSet, - embedding_object: Setting::NotSet, - input_type: Setting::NotSet, + request: Setting::NotSet, + response: Setting::NotSet, distribution: Setting::NotSet, }), );