From abdb337fd633967be7e431ce5dae5968d714f843 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 30 Jul 2024 15:43:40 +0200 Subject: [PATCH] Add openai tests --- meilisearch/tests/vector/mod.rs | 17 + meilisearch/tests/vector/openai.rs | 1761 ++++++++++++++++++++++++++++ meilisearch/tests/vector/rest.rs | 20 +- 3 files changed, 1780 insertions(+), 18 deletions(-) create mode 100644 meilisearch/tests/vector/openai.rs diff --git a/meilisearch/tests/vector/mod.rs b/meilisearch/tests/vector/mod.rs index 4a142f86a..9935c6330 100644 --- a/meilisearch/tests/vector/mod.rs +++ b/meilisearch/tests/vector/mod.rs @@ -1,3 +1,4 @@ +mod openai; mod rest; mod settings; @@ -7,6 +8,22 @@ use crate::common::index::Index; use crate::common::{GetAllDocumentsOptions, Server}; use crate::json; +async fn get_server_vector() -> Server { + let server = Server::new().await; + let (value, code) = server.set_features(json!({"vectorStore": true})).await; + snapshot!(code, @"200 OK"); + snapshot!(value, @r###" + { + "vectorStore": true, + "metrics": false, + "logsRoute": false, + "editDocumentsByFunction": false, + "containsFilter": false + } + "###); + server +} + #[actix_rt::test] async fn add_remove_user_provided() { let server = Server::new().await; diff --git a/meilisearch/tests/vector/openai.rs b/meilisearch/tests/vector/openai.rs new file mode 100644 index 000000000..744f239bc --- /dev/null +++ b/meilisearch/tests/vector/openai.rs @@ -0,0 +1,1761 @@ +use std::collections::BTreeMap; +use std::io::Write; +use std::sync::atomic::{AtomicU32, Ordering}; + +use meili_snap::{json_string, snapshot}; +use wiremock::matchers::{method, path}; +use wiremock::{Mock, MockServer, Request, ResponseTemplate}; + +use crate::common::{GetAllDocumentsOptions, Value}; +use crate::json; +use crate::vector::get_server_vector; + +#[derive(serde::Deserialize)] +struct OpenAiResponses(BTreeMap); + +#[derive(serde::Deserialize)] +struct OpenAiResponse { + large: Option>, + small: Option>, + ada: Option>, + large_512: Option>, +} + +impl OpenAiResponses { + fn get(&self, text: &str, model_dimensions: ModelDimensions) -> Option<&[f32]> { + let entry = self.0.get(text)?; + match model_dimensions { + ModelDimensions::Large => entry.large.as_deref(), + ModelDimensions::Small => entry.small.as_deref(), + ModelDimensions::Ada => entry.ada.as_deref(), + ModelDimensions::Large512 => entry.large_512.as_deref(), + } + } +} + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +enum ModelDimensions { + Large, + Small, + Ada, + Large512, +} + +impl ModelDimensions { + fn add_to_settings(&self, settings: &mut Value) { + settings["model"] = serde_json::json!(self.model()); + if let ModelDimensions::Large512 = self { + settings["dimensions"] = serde_json::json!(512); + } + } + + fn model(&self) -> &'static str { + match self { + ModelDimensions::Large | ModelDimensions::Large512 => "text-embedding-3-large", + ModelDimensions::Small => "text-embedding-3-small", + ModelDimensions::Ada => "text-embedding-ada-002", + } + } + + fn from_request(request: &serde_json::Value) -> Self { + let has_dimensions_512 = if let Some(dimensions) = request.get("dimensions") { + if dimensions != 512 { + panic!("unsupported dimensions values") + } + true + } else { + false + }; + let serde_json::Value::String(model) = &request["model"] else { + panic!("unsupported non string model") + }; + match (model.as_str(), has_dimensions_512) { + ("text-embedding-3-large", true) => Self::Large512, + (_, true) => panic!("unsupported dimensions with non-large model"), + ("text-embedding-3-large", false) => Self::Large, + ("text-embedding-3-small", false) => Self::Small, + ("text-embedding-ada-002", false) => Self::Ada, + (_, false) => panic!("unsupported model"), + } + } +} + +fn openai_responses() -> &'static OpenAiResponses { + static OPENAI_RESPONSES: std::sync::OnceLock = std::sync::OnceLock::new(); + OPENAI_RESPONSES.get_or_init(|| { + // json file that was compressed with gzip + // decompress with `gzip --keep -d openai_responses.json.gz` + // recompress with `gzip --keep -c openai_responses.json > openai_responses.json.gz` + let compressed_responses = include_bytes!("openai_responses.json.gz"); + let mut responses = Vec::new(); + let mut decoder = flate2::write::GzDecoder::new(&mut responses); + + decoder.write_all(compressed_responses).unwrap(); + drop(decoder); + serde_json::from_slice(&responses).unwrap() + }) +} + +async fn create_mock_with_template( + document_template: &str, + model_dimensions: ModelDimensions, + fallible: bool, +) -> (MockServer, Value) { + let mock_server = MockServer::start().await; + const API_KEY: &str = "my-api-key"; + const API_KEY_BEARER: &str = "Bearer my-api-key"; + + let attempt = AtomicU32::new(0); + + Mock::given(method("POST")) + .and(path("/")) + .respond_with(move |req: &Request| { + // 0. maybe return 500 + if fallible { + let attempt = attempt.fetch_add(1, Ordering::Relaxed); + let failed = matches!(attempt % 4, 0 | 1 | 3); + if failed { + return ResponseTemplate::new(503).set_body_json(json!({ + "error": { + "message": "come back later", + "type": "come_back_later" + } + })) + } + } + // 1. check API key + match req.headers.get("Authorization") { + Some(api_key) if api_key == API_KEY_BEARER => { + {} + } + Some(api_key) => { + let api_key = api_key.to_str().unwrap(); + return ResponseTemplate::new(401).set_body_json( + json!( + { + "error": { + "message": format!("Incorrect API key provided: {api_key}. You can find your API key at https://platform.openai.com/account/api-keys."), + "type": "invalid_request_error", + "param": serde_json::Value::Null, + "code": "invalid_api_key" + } + } + ), + ) + } + None => { + return ResponseTemplate::new(401).set_body_json( + json!( + { + "error": { + "message": "You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.", + "type": "invalid_request_error", + "param": serde_json::Value::Null, + "code": serde_json::Value::Null + } + } + ), + ) + } + } + // 2. parse text inputs + let query: serde_json::Value = match req.body_json() { + Ok(query) => query, + Err(_error) => return ResponseTemplate::new(400).set_body_json( + json!( + { + "error": { + "message": "We could not parse the JSON body of your request. (HINT: This likely means you aren't using your HTTP library correctly. The OpenAI API expects a JSON payload, but what was sent was not valid JSON. If you have trouble figuring out how to fix this, please contact us through our help center at help.openai.com.)", + "type": "invalid_request_error", + "param": serde_json::Value::Null, + "code": serde_json::Value::Null + } + } + ) + ) + }; + let query_model_dimensions = ModelDimensions::from_request(&query); + if query_model_dimensions != model_dimensions { + return ResponseTemplate::new(400).set_body_json(json!({ + "error": { + "message": format!("Expected {model_dimensions:?}, got {query_model_dimensions:?}"), + "type": "invalid_model_dimensions", + "query": query, + } + })) + } + + // 3. for each text, find embedding in responses + let serde_json::Value::Array(inputs) = &query["input"] else { + return ResponseTemplate::new(400).set_body_json(json!({ + "error": { + "message": "Unexpected `input` value", + "type": "test_response", + "query": query + } + })) + }; + + let mut embeddings = Vec::new(); + + for input in inputs { + let serde_json::Value::String(input) = input else { + return ResponseTemplate::new(400).set_body_json(json!({ + "error": { + "message": "Unexpected `input` value", + "type": "test_response", + "query": query + } + })) + }; + + let Some(embedding) = openai_responses().get(input, model_dimensions) else { + return ResponseTemplate::new(400).set_body_json(json!( + { + "error": { + "message": "Could not find embedding for text", + "text": input, + "model_dimensions": format!("{model_dimensions:?}"), + "type": "add_to_openai_responses_json_please", + "query": query, + } + } + )) + }; + + embeddings.push(embedding.to_vec()); + } + + let data : Vec<_> = embeddings.into_iter().enumerate().map(|(index, embedding)| json!({ + "object": "embedding", + "index": index, + "embedding": embedding, + })).collect(); + + // 4. produce output from embeddings + ResponseTemplate::new(200).set_body_json(json!({ + "object": "list", + "data": data, + "model": model_dimensions.model(), + "usage": { + "prompt_tokens": "[prompt_tokens]", + "total_tokens": "[total_tokens]" + } + })) + }) + .mount(&mock_server) + .await; + let url = mock_server.uri(); + + let mut embedder_settings = json!({ + "source": "openAi", + "url": url, + "apiKey": API_KEY, + "documentTemplate": document_template + }); + + model_dimensions.add_to_settings(&mut embedder_settings); + + (mock_server, embedder_settings) +} + +const DOGGO_TEMPLATE: &str = r#"{%- if doc.gender == "F" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}} + {%- else -%} + Un chien nommé {{doc.name}}, né en {{doc.birthyear}} + {%- endif %}, de race {{doc.breed}}."#; + +async fn create_mock() -> (MockServer, Value) { + create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false).await +} + +async fn create_mock_dimensions() -> (MockServer, Value) { + create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false).await +} + +async fn create_mock_small_embedding_model() -> (MockServer, Value) { + create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false).await +} + +async fn create_mock_legacy_embedding_model() -> (MockServer, Value) { + create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false).await +} + +async fn create_fallible_mock() -> (MockServer, Value) { + create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true).await +} + +// basic test "it works" +#[actix_rt::test] +async fn it_works() { + let (_mock, setting) = create_mock().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); +} + +// tokenize long text + +// "wrong parameters" + +#[actix_rt::test] +async fn bad_api_key() { + let (_mock, mut setting) = create_mock().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + + snapshot!(task, @r###" + { + "uid": 0, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // wrong API key + setting["apiKey"] = "doggo".into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "default": { + "source": "openAi", + "model": "text-embedding-3-large", + "apiKey": "XXX...", + "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.", + "url": "[url]" + } + } + }, + "error": { + "message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"Incorrect API key provided: Bearer doggo. You can find your API key at https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":\"invalid_api_key\"}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // no API key + setting.as_object_mut().unwrap().remove("apiKey"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task, @r###" + { + "uid": 2, + "indexUid": "doggo", + "status": "failed", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "default": { + "source": "openAi", + "model": "text-embedding-3-large", + "documentTemplate": "{%- if doc.gender == \"F\" -%}Une chienne nommée {{doc.name}}, née en {{doc.birthyear}}\n {%- else -%}\n Un chien nommé {{doc.name}}, né en {{doc.birthyear}}\n {%- endif %}, de race {{doc.breed}}.", + "url": "[url]" + } + } + }, + "error": { + "message": "While embedding documents for embedder `default`: user error: could not authenticate against OpenAI server\n - server replied with `{\"error\":{\"message\":\"You didn't provide an API key. You need to provide your API key in an Authorization header using Bearer auth (i.e. Authorization: Bearer YOUR_KEY), or as the password field (with blank username) if you're accessing the API from your browser and are prompted for a username and password. You can obtain an API key from https://platform.openai.com/account/api-keys.\",\"type\":\"invalid_request_error\",\"param\":null,\"code\":null}}`\n - Hint: Check the `apiKey` parameter in the embedder configuration, and the `MEILI_OPENAI_API_KEY` and `OPENAI_API_KEY` environment variables", + "code": "vector_embedding_error", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#vector_embedding_error" + }, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // not a string API key + setting["apiKey"] = 42.into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.embedders.default.apiKey`: expected a string, but found a positive integer: `42`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); +} + +// one test with wrong model +#[actix_rt::test] +async fn bad_model() { + let (_mock, mut setting) = create_mock().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + + snapshot!(task, @r###" + { + "uid": 0, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // wrong model + setting["model"] = "doggo".into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + + snapshot!(response, @r###" + { + "message": "`.embedders.default.model`: Invalid model `doggo` for OpenAI. Supported models: [\"text-embedding-ada-002\", \"text-embedding-3-small\", \"text-embedding-3-large\"]", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + + // not a string model + setting["model"] = 42.into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.embedders.default.model`: expected a string, but found a positive integer: `42`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); +} + +#[actix_rt::test] +async fn bad_dimensions() { + let (_mock, mut setting) = create_mock().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + + snapshot!(task, @r###" + { + "uid": 0, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + // null dimensions + setting["dimensions"] = 0.into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + + snapshot!(response, @r###" + { + "message": "`.embedders.default.dimensions`: `dimensions` cannot be zero", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + + // negative dimensions + setting["dimensions"] = (-42).into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.embedders.default.dimensions`: expected a positive integer, but found a negative integer: `-42`", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); + + // huge dimensions + setting["dimensions"] = (42_000_000).into(); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "`.embedders.default.dimensions`: Model `text-embedding-3-large` does not support overriding its dimensions to a value higher than 3072. Found 42000000", + "code": "invalid_settings_embedders", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_embedders" + } + "###); +} + +// one test with changed dimensions +#[actix_rt::test] +async fn smaller_dimensions() { + let (_mock, setting) = create_mock_dimensions().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); +} + +// one test with different models +#[actix_rt::test] +async fn small_embedding_model() { + let (_mock, setting) = create_mock_small_embedding_model().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); +} + +#[actix_rt::test] +async fn legacy_embedding_model() { + let (_mock, setting) = create_mock_legacy_embedding_model().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + } + ] + "###); +} + +// test with a server that responds 500 on 3 out of 4 calls +#[actix_rt::test] +async fn it_still_works() { + let (_mock, setting) = create_fallible_mock().await; + let server = get_server_vector().await; + let index = server.index("doggo"); + + let (response, code) = index + .update_settings(json!({ + "embedders": { + "default": setting, + }, + })) + .await; + snapshot!(code, @"202 Accepted"); + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + let documents = json!([ + {"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"}, + {"id": 1, "name": "Intel", "gender": "M", "birthyear": 2011, "breed": "Beagle"}, + {"id": 2, "name": "Vénus", "gender": "F", "birthyear": 2003, "breed": "Jack Russel Terrier"}, + {"id": 3, "name": "Max", "gender": "M", "birthyear": 1995, "breed": "Labrador Retriever"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + let task = index.wait_task(value.uid()).await; + snapshot!(task, @r###" + { + "uid": 1, + "indexUid": "doggo", + "status": "succeeded", + "type": "documentAdditionOrUpdate", + "canceledBy": null, + "details": { + "receivedDocuments": 4, + "indexedDocuments": 4 + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "###); + + let (documents, _code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###" + { + "results": [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever", + "_vectors": { + "default": { + "embeddings": "[vector]", + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "###); + + let (response, code) = index + .search_post(json!({ + "q": "chien de chasse", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "petit chien", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + }, + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + } + ] + "###); + + let (response, code) = index + .search_post(json!({ + "q": "grand chien de berger des montagnes", + "hybrid": {"semanticRatio": 1.0} + })) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "name": "kefir", + "gender": "M", + "birthyear": 2023, + "breed": "Patou" + }, + { + "id": 1, + "name": "Intel", + "gender": "M", + "birthyear": 2011, + "breed": "Beagle" + }, + { + "id": 3, + "name": "Max", + "gender": "M", + "birthyear": 1995, + "breed": "Labrador Retriever" + }, + { + "id": 2, + "name": "Vénus", + "gender": "F", + "birthyear": 2003, + "breed": "Jack Russel Terrier" + } + ] + "###); +} +// test with a server that wrongly responds 400 diff --git a/meilisearch/tests/vector/rest.rs b/meilisearch/tests/vector/rest.rs index fe72ad428..75b0cf70d 100644 --- a/meilisearch/tests/vector/rest.rs +++ b/meilisearch/tests/vector/rest.rs @@ -5,9 +5,9 @@ use reqwest::IntoUrl; use wiremock::matchers::{method, path}; use wiremock::{Mock, MockServer, Request, ResponseTemplate}; -use crate::common::{Server, Value}; +use crate::common::Value; use crate::json; -use crate::vector::GetAllDocumentsOptions; +use crate::vector::{get_server_vector, GetAllDocumentsOptions}; async fn create_mock() -> (MockServer, Value) { let mock_server = MockServer::start().await; @@ -265,22 +265,6 @@ async fn dummy_testing_the_mock() { snapshot!(body, @r###"{"data":[4,4,4]}"###); } -async fn get_server_vector() -> Server { - let server = Server::new().await; - let (value, code) = server.set_features(json!({"vectorStore": true})).await; - snapshot!(code, @"200 OK"); - snapshot!(value, @r###" - { - "vectorStore": true, - "metrics": false, - "logsRoute": false, - "editDocumentsByFunction": false, - "containsFilter": false - } - "###); - server -} - #[actix_rt::test] async fn bad_request() { let (mock, _setting) = create_mock().await;