diff --git a/meilisearch/tests/common/mod.rs b/meilisearch/tests/common/mod.rs index ee1d8aa6e..0c4e8e25c 100644 --- a/meilisearch/tests/common/mod.rs +++ b/meilisearch/tests/common/mod.rs @@ -81,6 +81,7 @@ impl Display for Value { f, "{}", json_string!(self, { + ".uid" => "[uid]", ".enqueuedAt" => "[date]", ".startedAt" => "[date]", ".finishedAt" => "[date]", diff --git a/meilisearch/tests/documents/add_documents.rs b/meilisearch/tests/documents/add_documents.rs index 7c5f3efd3..819b2ddc2 100644 --- a/meilisearch/tests/documents/add_documents.rs +++ b/meilisearch/tests/documents/add_documents.rs @@ -1110,7 +1110,7 @@ async fn document_addition_with_huge_int_primary_key() { snapshot!(response, @r###" { - "uid": 0, + "uid": "[uid]", "indexUid": "test", "status": "succeeded", "type": "documentAdditionOrUpdate", @@ -1402,7 +1402,7 @@ async fn error_document_field_limit_reached_over_multiple_documents() { snapshot!(response, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "test", "status": "succeeded", "type": "documentAdditionOrUpdate", @@ -1436,7 +1436,7 @@ async fn error_document_field_limit_reached_over_multiple_documents() { snapshot!(response, @r###" { - "uid": 2, + "uid": "[uid]", "indexUid": "test", "status": "failed", "type": "documentAdditionOrUpdate", @@ -1485,7 +1485,7 @@ async fn error_document_field_limit_reached_in_one_nested_document() { snapshot!(response, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "test", "status": "succeeded", "type": "documentAdditionOrUpdate", @@ -1528,7 +1528,7 @@ async fn error_document_field_limit_reached_over_multiple_documents_with_nested_ snapshot!(response, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "test", "status": "succeeded", "type": "documentAdditionOrUpdate", @@ -1563,7 +1563,7 @@ async fn error_document_field_limit_reached_over_multiple_documents_with_nested_ snapshot!(response, @r###" { - "uid": 2, + "uid": "[uid]", "indexUid": "test", "status": "succeeded", "type": "documentAdditionOrUpdate", @@ -2209,7 +2209,7 @@ async fn add_invalid_geo_and_then_settings() { let ret = index.wait_task(ret.uid()).await; snapshot!(ret, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "test", "status": "succeeded", "type": "documentAdditionOrUpdate", @@ -2231,7 +2231,7 @@ async fn add_invalid_geo_and_then_settings() { let ret = index.wait_task(ret.uid()).await; snapshot!(ret, @r###" { - "uid": 2, + "uid": "[uid]", "indexUid": "test", "status": "failed", "type": "settingsUpdate", diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/1.snap b/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/1.snap index 4b05d417a..2004eb036 100644 --- a/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/1.snap +++ b/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/1.snap @@ -2,7 +2,7 @@ source: meilisearch/tests/dumps/mod.rs --- { - "uid": 0, + "uid": "[uid]", "indexUid": "pets", "status": "succeeded", "type": "settingsUpdate", diff --git a/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/2.snap b/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/2.snap index 43971924b..8405a0461 100644 --- a/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/2.snap +++ b/meilisearch/tests/dumps/snapshots/mod.rs/generate_and_import_dump_containing_vectors/2.snap @@ -2,7 +2,7 @@ source: meilisearch/tests/dumps/mod.rs --- { - "uid": 1, + "uid": "[uid]", "indexUid": "pets", "status": "succeeded", "type": "documentAdditionOrUpdate", diff --git a/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap b/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap index 1b8190c42..64e48b6a5 100644 --- a/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap +++ b/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap @@ -2,7 +2,7 @@ source: meilisearch/tests/search/distinct.rs --- { - "uid": 1, + "uid": "[uid]", "indexUid": "tamo", "status": "succeeded", "type": "settingsUpdate", diff --git a/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap b/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap index 903e96ffb..18532cba4 100644 --- a/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap +++ b/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap @@ -2,7 +2,7 @@ source: meilisearch/tests/search/errors.rs --- { - "uid": 0, + "uid": "[uid]", "indexUid": "tamo", "status": "succeeded", "type": "indexCreation", diff --git a/meilisearch/tests/tasks/mod.rs b/meilisearch/tests/tasks/mod.rs index f2ed76b6a..23ba669ca 100644 --- a/meilisearch/tests/tasks/mod.rs +++ b/meilisearch/tests/tasks/mod.rs @@ -744,7 +744,7 @@ async fn test_summarized_index_deletion() { snapshot!(task, @r###" { - "uid": 0, + "uid": "[uid]", "indexUid": "test", "status": "failed", "type": "indexDeletion", @@ -774,7 +774,7 @@ async fn test_summarized_index_deletion() { snapshot!(task, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "test", "status": "succeeded", "type": "documentAdditionOrUpdate", @@ -796,7 +796,7 @@ async fn test_summarized_index_deletion() { snapshot!(task, @r###" { - "uid": 2, + "uid": "[uid]", "indexUid": "test", "status": "succeeded", "type": "indexDeletion", @@ -818,7 +818,7 @@ async fn test_summarized_index_deletion() { snapshot!(task, @r###" { - "uid": 3, + "uid": "[uid]", "indexUid": "test", "status": "failed", "type": "indexDeletion", diff --git a/meilisearch/tests/vector/intel_gen.txt.gz b/meilisearch/tests/vector/intel_gen.txt.gz new file mode 100644 index 000000000..115eafea5 Binary files /dev/null and b/meilisearch/tests/vector/intel_gen.txt.gz differ diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 4a142f86a..0837fb1b8 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -1,3 +1,4 @@ +mod openai; mod rest; mod settings; @@ -7,6 +8,22 @@ use crate::common::index::Index; use crate::common::{GetAllDocumentsOptions, Server}; use crate::json; +async fn get_server_vector() -> Server { + let server = Server::new().await; + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + server +} + #[actix_rt::test] async fn add_remove_user_provided() { let server = Server::new().await; @@ -218,7 +235,7 @@ async fn user_provided_embeddings_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 2, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -247,7 +264,7 @@ async fn user_provided_embeddings_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 3, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -277,7 +294,7 @@ async fn user_provided_embeddings_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 4, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -306,7 +323,7 @@ async fn user_provided_embeddings_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 5, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -335,7 +352,7 @@ async fn user_provided_embeddings_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 6, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -364,7 +381,7 @@ async fn user_provided_embeddings_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 7, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -405,7 +422,7 @@ async fn user_provided_embeddings_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 10, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -433,7 +450,7 @@ async fn user_provided_embeddings_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 11, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -461,7 +478,7 @@ async fn user_provided_embeddings_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 12, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -497,7 +514,7 @@ async fn user_provided_vectors_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 2, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -526,7 +543,7 @@ async fn user_provided_vectors_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 3, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -555,7 +572,7 @@ async fn user_provided_vectors_error() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 4, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", diff --git a/meilisearch/tests/vector/openai.rs b/meilisearch/tests/vector/openai.rs new file mode 100644 index 000000000..f350abbe1 --- /dev/null +++ b/meilisearch/tests/vector/openai.rs @@ -0,0 +1,1873 @@ +use std::collections::BTreeMap; +use std::io::Write; +use std::sync::atomic::{AtomicU32, Ordering}; +use std::sync::OnceLock; + +use meili_snap::{json_string, snapshot}; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, Request, ResponseTemplate}; + +use crate::common::{GetAllDocumentsOptions, Value}; +use crate::json; +use crate::vector::get_server_vector; + +#[derive(serde::Deserialize)] +struct OpenAiResponses(BTreeMap); + +#[derive(serde::Deserialize)] +struct OpenAiResponse { + large: Option>, + small: Option>, + ada: Option>, + large_512: Option>, +} + +#[derive(serde::Deserialize)] +struct OpenAiTokenizedResponses { + tokens: Vec, + embedding: Vec, +} + +impl OpenAiResponses { + fn get(&self, text: &str, model_dimensions: ModelDimensions) -> Option<&[f32]> { + let entry = self.0.get(text)?; + match model_dimensions { + ModelDimensions::Large => entry.large.as_deref(), + ModelDimensions::Small => entry.small.as_deref(), + ModelDimensions::Ada => entry.ada.as_deref(), + ModelDimensions::Large512 => entry.large_512.as_deref(), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ModelDimensions { + Large, + Small, + Ada, + Large512, +} + +impl ModelDimensions { + fn add_to_settings(&self, settings: &mut Value) { + settings["model"] = serde_json::json!(self.model()); + if let ModelDimensions::Large512 = self { + settings["dimensions"] = serde_json::json!(512); + } + } + + fn model(&self) -> &'static str { + match self { + ModelDimensions::Large | ModelDimensions::Large512 => "text-embedding-3-large", + ModelDimensions::Small => "text-embedding-3-small", + ModelDimensions::Ada => "text-embedding-ada-002", + } + } + + fn from_request(request: &serde_json::Value) -> Self { + let has_dimensions_512 = if let Some(dimensions) = request.get("dimensions") { + if dimensions != 512 { + panic!("unsupported dimensions values") + } + true + } else { + false + }; + let serde_json::Value::String(model) = &request["model"] else { + panic!("unsupported non string model") + }; + match (model.as_str(), has_dimensions_512) { + ("text-embedding-3-large", true) => Self::Large512, + (_, true) => panic!("unsupported dimensions with non-large model"), + ("text-embedding-3-large", false) => Self::Large, + ("text-embedding-3-small", false) => Self::Small, + ("text-embedding-ada-002", false) => Self::Ada, + (_, false) => panic!("unsupported model"), + } + } +} + +fn openai_responses() -> &'static OpenAiResponses { + static OPENAI_RESPONSES: OnceLock = OnceLock::new(); + OPENAI_RESPONSES.get_or_init(|| { + // json file that was compressed with gzip + // decompress with `gzip --keep -d openai_responses.json.gz` + // recompress with `gzip --keep -c openai_responses.json > openai_responses.json.gz` + let compressed_responses = include_bytes!("openai_responses.json.gz"); + let mut responses = Vec::new(); + let mut decoder = flate2::write::GzDecoder::new(&mut responses); + + decoder.write_all(compressed_responses).unwrap(); + drop(decoder); + serde_json::from_slice(&responses).unwrap() + }) +} + +fn openai_tokenized_responses() -> &'static OpenAiTokenizedResponses { + static OPENAI_TOKENIZED_RESPONSES: OnceLock = OnceLock::new(); + OPENAI_TOKENIZED_RESPONSES.get_or_init(|| { + // json file that was compressed with gzip + // decompress with `gzip --keep -d openai_tokenized_responses.json.gz` + // recompress with `gzip --keep -c openai_tokenized_responses.json > openai_tokenized_responses.json.gz` + let compressed_responses = include_bytes!("openai_tokenized_responses.json.gz"); + let mut responses = Vec::new(); + let mut decoder = flate2::write::GzDecoder::new(&mut responses); + + decoder.write_all(compressed_responses).unwrap(); + drop(decoder); + serde_json::from_slice(&responses).unwrap() + }) +} + +fn long_text() -> &'static str { + static LONG_TEXT: OnceLock = OnceLock::new(); + LONG_TEXT.get_or_init(|| { + // decompress with `gzip --keep -d intel_gen.txt.gz` + // recompress with `gzip --keep -c intel_gen.txt > intel_gen.txt.gz` + let compressed_long_text = include_bytes!("intel_gen.txt.gz"); + let mut long_text = Vec::new(); + let mut decoder = flate2::write::GzDecoder::new(&mut long_text); + + decoder.write_all(compressed_long_text).unwrap(); + drop(decoder); + let long_text = std::str::from_utf8(&long_text).unwrap(); + + long_text.repeat(3) + }) +} + +async fn create_mock_tokenized() -> (MockServer, Value) { + create_mock_with_template("{{doc.text}}", ModelDimensions::Large, false).await +} + +async fn create_mock_with_template( + document_template: &str, + model_dimensions: ModelDimensions, + fallible: bool, +) -> (MockServer, Value) { + let mock_server = MockServer::start().await; + const API_KEY: &str = "my-api-key"; + const API_KEY_BEARER: &str = "Bearer my-api-key"; + + let attempt = AtomicU32::new(0); + + Mock::given(method("POST")) + .and(path("/")) + .respond_with(move |req: &Request| { + // 0. maybe return 500 + if fallible { + let attempt = attempt.fetch_add(1, Ordering::Relaxed); + let failed = matches!(attempt % 4, 0 | 1 | 3); + if failed { + return ResponseTemplate::new(503).set_body_json(json!({ + "error": { + "message": "come back later", + "type": "come_back_later" + } + })) + } + } + // 1. check API key + match req.headers.get("Authorization") { + Some(api_key) if api_key == API_KEY_BEARER => { + {} + } + Some(api_key) => { + let api_key = api_key.to_str().unwrap(); + return ResponseTemplate::new(401).set_body_json( + json!( + { + "error": { + "message": format!("Incorrect API key provided: {api_key}. You can find your API key at https://platform.openai.com/account/api-keys."), + "type": "invalid_request_error", + "param": serde_json::Value::Null, + "code": "invalid_api_key" + } + } + ), + ) + } + None => { + return ResponseTemplate::new(401).set_body_json( + json!( + { + "error": { + "message": "You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.", + "type": "invalid_request_error", + "param": serde_json::Value::Null, + "code": serde_json::Value::Null + } + } + ), + ) + } + } + // 2. parse text inputs + let query: serde_json::Value = match req.body_json() { + Ok(query) => query, + Err(_error) => return ResponseTemplate::new(400).set_body_json( + json!( + { + "error": { + "message": "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", + "type": "invalid_request_error", + "param": serde_json::Value::Null, + "code": serde_json::Value::Null + } + } + ) + ) + }; + let query_model_dimensions = ModelDimensions::from_request(&query); + if query_model_dimensions != model_dimensions { + panic!("Expected {model_dimensions:?}, got {query_model_dimensions:?}") + } + + // 3. for each text, find embedding in responses + let serde_json::Value::Array(inputs) = &query["input"] else { + panic!("Unexpected `input` value") + }; + + let openai_tokenized_responses = openai_tokenized_responses(); + let embeddings = if inputs == openai_tokenized_responses.tokens.as_slice() { + vec![openai_tokenized_responses.embedding.clone()] + } else { + let mut embeddings = Vec::new(); + for input in inputs { + let serde_json::Value::String(input) = input else { + return ResponseTemplate::new(400).set_body_json(json!({ + "error": { + "message": "Unexpected `input` value", + "type": "test_response", + "query": query + } + })) + }; + + if input == long_text() { + return ResponseTemplate::new(400).set_body_json(json!( + { + "error": { + "message": "This model's maximum context length is 8192 tokens, however you requested 10554 tokens (10554 in your prompt; 0 for the completion). Please reduce your prompt; or completion length.", + "type": "invalid_request_error", + "param": null, + "code": null, + } + } + )); + } + + let Some(embedding) = openai_responses().get(input, model_dimensions) else { + return ResponseTemplate::new(404).set_body_json(json!( + { + "error": { + "message": "Could not find embedding for text", + "text": input, + "model_dimensions": format!("{model_dimensions:?}"), + "type": "add_to_openai_responses_json_please", + "query": query, + } + } + )) + }; + + embeddings.push(embedding.to_vec()); + } + embeddings + }; + + + let data : Vec<_> = embeddings.into_iter().enumerate().map(|(index, embedding)| json!({ + "object": "embedding", + "index": index, + "embedding": embedding, + })).collect(); + + // 4. produce output from embeddings + ResponseTemplate::new(200).set_body_json(json!({ + "object": "list", + "data": data, + "model": model_dimensions.model(), + "usage": { + "prompt_tokens": "[prompt_tokens]", + "total_tokens": "[total_tokens]" + } + })) + }) + .mount(&mock_server) + .await; + let url = mock_server.uri(); + + let mut embedder_settings = json!({ + "source": "openAi", + "url": url, + "apiKey": API_KEY, + "documentTemplate": document_template + }); + + model_dimensions.add_to_settings(&mut embedder_settings); + + (mock_server, embedder_settings) +} + +const DOGGO_TEMPLATE: &str = r#"{%- if doc.gender == "F" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}} + {%- else -%} + Un chien nommé {{doc.name}}, né en {{doc.birthyear}} + {%- endif %}, de race {{doc.breed}}."#; + +async fn create_mock() -> (MockServer, Value) { + create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false).await +} + +async fn create_mock_dimensions() -> (MockServer, Value) { + create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false).await +} + +async fn create_mock_small_embedding_model() -> (MockServer, Value) { + create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false).await +} + +async fn create_mock_legacy_embedding_model() -> (MockServer, Value) { + create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false).await +} + +async fn create_fallible_mock() -> (MockServer, Value) { + create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true).await +} + +// basic test "it works" +#[actix_rt::test] +async fn it_works() { + let (_mock, setting) = create_mock().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); +} + +// tokenize long text + +// basic test "it works" +#[actix_rt::test] +async fn tokenize_long_text() { + let (_mock, setting) = create_mock_tokenized().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "text": long_text()} + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 1, + "indexedDocuments": 1 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "showRankingScore": true, + "attributesToRetrieve": ["id"], + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "_rankingScore": 0.07944583892822266 + } + ] + "###); +} + +// "wrong parameters" + +#[actix_rt::test] +async fn bad_api_key() { + let (_mock, mut setting) = create_mock().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // wrong API key + setting["apiKey"] = "doggo".into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "default": { + "source": "openAi", + "model": "text-embedding-3-large", + "apiKey": "XXX...", + "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.", + "url": "[url]" + } + } + }, + "error": { + "message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":\"invalid_api_key\"}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // no API key + setting.as_object_mut().unwrap().remove("apiKey"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "default": { + "source": "openAi", + "model": "text-embedding-3-large", + "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.", + "url": "[url]" + } + } + }, + "error": { + "message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":null}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // not a string API key + setting["apiKey"] = 42.into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.embedders.default.apiKey`: expected a string, but found a positive integer: `42`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); +} + +// one test with wrong model +#[actix_rt::test] +async fn bad_model() { + let (_mock, mut setting) = create_mock().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // wrong model + setting["model"] = "doggo".into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + + snapshot!(response, @r###" + { + "message": "`.embedders.default.model`: Invalid model `doggo` for OpenAI. Supported models: [\"text-embedding-ada-002\", \"text-embedding-3-small\", \"text-embedding-3-large\"]", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + + // not a string model + setting["model"] = 42.into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.embedders.default.model`: expected a string, but found a positive integer: `42`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); +} + +#[actix_rt::test] +async fn bad_dimensions() { + let (_mock, mut setting) = create_mock().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // null dimensions + setting["dimensions"] = 0.into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + + snapshot!(response, @r###" + { + "message": "`.embedders.default.dimensions`: `dimensions` cannot be zero", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + + // negative dimensions + setting["dimensions"] = (-42).into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.embedders.default.dimensions`: expected a positive integer, but found a negative integer: `-42`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + + // huge dimensions + setting["dimensions"] = (42_000_000).into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.default.dimensions`: Model `text-embedding-3-large` does not support overriding its dimensions to a value higher than 3072. Found 42000000", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); +} + +// one test with changed dimensions +#[actix_rt::test] +async fn smaller_dimensions() { + let (_mock, setting) = create_mock_dimensions().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); +} + +// one test with different models +#[actix_rt::test] +async fn small_embedding_model() { + let (_mock, setting) = create_mock_small_embedding_model().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); +} + +#[actix_rt::test] +async fn legacy_embedding_model() { + let (_mock, setting) = create_mock_legacy_embedding_model().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + } + ] + "###); +} + +// test with a server that responds 500 on 3 out of 4 calls +#[actix_rt::test] +async fn it_still_works() { + let (_mock, setting) = create_fallible_mock().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": "[uid]", + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); +} +// test with a server that wrongly responds 400 diff --git a/meilisearch/tests/vector/openai_responses.json.gz b/meilisearch/tests/vector/openai_responses.json.gz new file mode 100644 index 000000000..2d27822fe Binary files /dev/null and b/meilisearch/tests/vector/openai_responses.json.gz differ diff --git a/meilisearch/tests/vector/openai_tokenized_responses.json.gz b/meilisearch/tests/vector/openai_tokenized_responses.json.gz new file mode 100644 index 000000000..0c708448c Binary files /dev/null and b/meilisearch/tests/vector/openai_tokenized_responses.json.gz differ diff --git a/meilisearch/tests/vector/rest.rs b/meilisearch/tests/vector/rest.rs index 71d3c7cda..1a64eeb78 100644 --- a/meilisearch/tests/vector/rest.rs +++ b/meilisearch/tests/vector/rest.rs @@ -5,9 +5,9 @@ use reqwest::IntoUrl; use wiremock::matchers::{method, path}; use wiremock::{Mock, MockServer, Request, ResponseTemplate}; -use crate::common::{Server, Value}; +use crate::common::Value; use crate::json; -use crate::vector::GetAllDocumentsOptions; +use crate::vector::{get_server_vector, GetAllDocumentsOptions}; async fn create_mock() -> (MockServer, Value) { let mock_server = MockServer::start().await; @@ -265,22 +265,6 @@ async fn dummy_testing_the_mock() { snapshot!(body, @r###"{"data":[4,4,4]}"###); } -async fn get_server_vector() -> Server { - let server = Server::new().await; - let (value, code) = server.set_features(json!({"vectorStore": true})).await; - snapshot!(code, @"200 OK"); - snapshot!(value, @r###" - { - "vectorStore": true, - "metrics": false, - "logsRoute": false, - "editDocumentsByFunction": false, - "containsFilter": false - } - "###); - server -} - #[actix_rt::test] async fn bad_request() { let (mock, _setting) = create_mock().await; @@ -896,7 +880,7 @@ async fn bad_settings() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 0, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "settingsUpdate", @@ -941,7 +925,7 @@ async fn bad_settings() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 2, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -990,7 +974,7 @@ async fn add_vector_and_user_provided() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "doggo", "status": "succeeded", "type": "documentAdditionOrUpdate", @@ -1086,7 +1070,7 @@ async fn server_returns_bad_request() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 0, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "settingsUpdate", @@ -1125,7 +1109,7 @@ async fn server_returns_bad_request() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "doggo", "status": "succeeded", "type": "settingsUpdate", @@ -1154,7 +1138,7 @@ async fn server_returns_bad_request() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 2, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "documentAdditionOrUpdate", @@ -1198,7 +1182,7 @@ async fn server_returns_bad_response() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 0, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "settingsUpdate", @@ -1251,7 +1235,7 @@ async fn server_returns_bad_response() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "settingsUpdate", @@ -1306,7 +1290,7 @@ async fn server_returns_bad_response() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 2, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "settingsUpdate", @@ -1361,7 +1345,7 @@ async fn server_returns_bad_response() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 3, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "settingsUpdate", @@ -1426,7 +1410,7 @@ async fn server_returns_bad_response() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 4, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "settingsUpdate", @@ -1493,7 +1477,7 @@ async fn server_returns_multiple() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "doggo", "status": "succeeded", "type": "documentAdditionOrUpdate", @@ -1598,7 +1582,7 @@ async fn server_single_input_returns_in_array() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "doggo", "status": "succeeded", "type": "documentAdditionOrUpdate", @@ -1703,7 +1687,7 @@ async fn server_raw() { let task = index.wait_task(value.uid()).await; snapshot!(task, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "doggo", "status": "succeeded", "type": "documentAdditionOrUpdate", @@ -1800,7 +1784,7 @@ async fn server_custom_header() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 0, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "settingsUpdate", @@ -1816,7 +1800,7 @@ async fn server_custom_header() { } }, "error": { - "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`", + "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"missing header 'my-nonstandard-auth'\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -1839,7 +1823,7 @@ async fn server_custom_header() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "doggo", "status": "failed", "type": "settingsUpdate", @@ -1858,7 +1842,7 @@ async fn server_custom_header() { } }, "error": { - "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`", + "message": "Error while generating embeddings: runtime error: could not determine model dimensions:\n - test embedding failed with user error: could not authenticate against embedding server\n - server replied with `{\"error\":\"thou shall not pass, Balrog\"}`\n - Hint: Check the `apiKey` parameter in the embedder configuration", "code": "vector_embedding_error", "type": "invalid_request", "link": "https://docs.meilisearch.com/errors#vector_embedding_error" @@ -1881,7 +1865,7 @@ async fn server_custom_header() { let task = server.wait_task(response.uid()).await; snapshot!(task, @r###" { - "uid": 2, + "uid": "[uid]", "indexUid": "doggo", "status": "succeeded", "type": "settingsUpdate", diff --git a/meilisearch/tests/vector/settings.rs b/meilisearch/tests/vector/settings.rs index 8fdb858c3..0714a22ca 100644 --- a/meilisearch/tests/vector/settings.rs +++ b/meilisearch/tests/vector/settings.rs @@ -43,7 +43,7 @@ async fn update_embedder() { let ret = server.wait_task(response.uid()).await; snapshot!(ret, @r###" { - "uid": 1, + "uid": "[uid]", "indexUid": "doggo", "status": "succeeded", "type": "settingsUpdate", diff --git a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap index 52d9ad38d..c4f1c0b25 100644 --- a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap +++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-added.snap @@ -2,7 +2,7 @@ source: meilisearch/tests/vector/mod.rs --- { - "uid": 1, + "uid": "[uid]", "indexUid": "doggo", "status": "succeeded", "type": "documentAdditionOrUpdate", diff --git a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap index de02d0b1d..c4f1c0b25 100644 --- a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap +++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/document-deleted.snap @@ -2,7 +2,7 @@ source: meilisearch/tests/vector/mod.rs --- { - "uid": 2, + "uid": "[uid]", "indexUid": "doggo", "status": "succeeded", "type": "documentAdditionOrUpdate", diff --git a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap index 316305fa8..08dbe3ee0 100644 --- a/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap +++ b/meilisearch/tests/vector/snapshots/mod.rs/add_remove_one_vector_4588/settings-processed.snap @@ -2,7 +2,7 @@ source: meilisearch/tests/vector/mod.rs --- { - "uid": 0, + "uid": "[uid]", "indexUid": "doggo", "status": "succeeded", "type": "settingsUpdate", diff --git a/milli/src/vector/error.rs b/milli/src/vector/error.rs index 7e1cb8752..3c8cb4b06 100644 --- a/milli/src/vector/error.rs +++ b/milli/src/vector/error.rs @@ -62,8 +62,18 @@ pub enum EmbedErrorKind { RestResponseDeserialization(std::io::Error), #[error("expected a response containing {0} embeddings, got only {1}")] RestResponseEmbeddingCount(usize, usize), - #[error("could not authenticate against embedding server{}", option_info(.0.as_deref(), "server replied with "))] - RestUnauthorized(Option), + #[error("could not authenticate against {embedding} server{server_reply}{hint}", embedding=match *.1 { + ConfigurationSource::User => "embedding", + ConfigurationSource::OpenAi => "OpenAI", + ConfigurationSource::Ollama => "ollama" + }, + server_reply=option_info(.0.as_deref(), "server replied with "), + hint=match *.1 { + ConfigurationSource::User => "\n - Hint: Check the `apiKey` parameter in the embedder configuration", + ConfigurationSource::OpenAi => "\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables", + ConfigurationSource::Ollama => "\n - Hint: Check the `apiKey` parameter in the embedder configuration" + })] + RestUnauthorized(Option, ConfigurationSource), #[error("sent too many requests to embedding server{}", option_info(.0.as_deref(), "server replied with "))] RestTooManyRequests(Option), #[error("sent a bad request to embedding server{}{}", @@ -136,8 +146,14 @@ impl EmbedError { } } - pub(crate) fn rest_unauthorized(error_response: Option) -> EmbedError { - Self { kind: EmbedErrorKind::RestUnauthorized(error_response), fault: FaultSource::User } + pub(crate) fn rest_unauthorized( + error_response: Option, + configuration_source: ConfigurationSource, + ) -> EmbedError { + Self { + kind: EmbedErrorKind::RestUnauthorized(error_response, configuration_source), + fault: FaultSource::User, + } } pub(crate) fn rest_too_many_requests(error_response: Option) -> EmbedError { diff --git a/milli/src/vector/openai.rs b/milli/src/vector/openai.rs index ce63e69d7..cef45f90e 100644 --- a/milli/src/vector/openai.rs +++ b/milli/src/vector/openai.rs @@ -183,7 +183,7 @@ impl Embedder { let rest_embedder = RestEmbedder::new( RestEmbedderOptions { - api_key: Some(api_key.clone()), + api_key: (!api_key.is_empty()).then(|| api_key.clone()), distribution: None, dimensions: Some(options.dimensions()), url, diff --git a/milli/src/vector/rest.rs b/milli/src/vector/rest.rs index 593d2b509..2538f2fff 100644 --- a/milli/src/vector/rest.rs +++ b/milli/src/vector/rest.rs @@ -275,7 +275,10 @@ fn check_response( Err(ureq::Error::Status(code, response)) => { let error_response: Option = response.into_string().ok(); Err(match code { - 401 => Retry::give_up(EmbedError::rest_unauthorized(error_response)), + 401 => Retry::give_up(EmbedError::rest_unauthorized( + error_response, + configuration_source, + )), 429 => Retry::rate_limited(EmbedError::rest_too_many_requests(error_response)), 400 => Retry::give_up(EmbedError::rest_bad_request( error_response,