diff --git a/crates/meilisearch/tests/common/mod.rs b/crates/meilisearch/tests/common/mod.rs index 1a73a7532..2fafbd11f 100644 --- a/crates/meilisearch/tests/common/mod.rs +++ b/crates/meilisearch/tests/common/mod.rs @@ -3,7 +3,7 @@ pub mod index; pub mod server; pub mod service; -use std::fmt::{self, Display}; +use std::{fmt::{self, Display}, future::Future}; #[allow(unused)] pub use index::GetAllDocumentsOptions; diff --git a/crates/meilisearch/tests/vector/fragments.rs b/crates/meilisearch/tests/vector/fragments.rs index 027a4069d..c3fef8c79 100644 --- a/crates/meilisearch/tests/vector/fragments.rs +++ b/crates/meilisearch/tests/vector/fragments.rs @@ -6,69 +6,23 @@ use wiremock::matchers::{method, path}; use wiremock::{Mock, MockServer, Request, ResponseTemplate}; use crate::common::index::Index; -use crate::common::Shared; -use crate::common::Value; +use crate::common::{Owned, Shared}; use crate::json; use crate::vector::Server; -use crate::vector::{get_server_vector, GetAllDocumentsOptions}; +use crate::vector::GetAllDocumentsOptions; async fn shared_index_for_fragments() -> Index<'static, Shared> { static INDEX: OnceCell<(Server, String)> = OnceCell::const_new(); let (server, uid) = INDEX .get_or_init(|| async { - let (_mock, settings) = create_mock( - json!({ - "withBreed": {"value": "{{ doc.name }} is a {{ doc.breed }}"}, - "basic": {"value": "{{ doc.name }} is a dog"}, - }), - json!({ - "justBreed": {"value": "It's a {{ media.breed }}"}, - "justName": {"value": "{{ media.name }} is a dog"}, - "query": {"value": "Some pre-prompt for query {{ q }}"}, - }), - ) - .await; - - let server = Server::new().await; - let index = server.unique_index(); - - let (_response, code) = server.set_features(json!({"multimodal": true})).await; - snapshot!(code, @"200 OK"); - - // Configure the index to use our mock embedder - let (response, code) = index - .update_settings(json!({ - "embedders": { - "rest": settings, - }, - })) - .await; - snapshot!(code, @"202 Accepted"); - - let task = server.wait_task(response.uid()).await; - snapshot!(task["status"], @r###""succeeded""###); - - // Send documents - let documents = json!([ - {"id": 0, "name": "kefir"}, - {"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }}, - {"id": 2, "name": "intel", "breed": "labrador"}, - {"id": 3, "name": "dustin", "breed": "bulldog"}, - ]); - let (value, code) = index.add_documents(documents, None).await; - snapshot!(code, @"202 Accepted"); - - let task = index.wait_task(value.uid()).await; - snapshot!(task["status"], @r###""succeeded""###); - - let uid = index.uid.clone(); + let (server, uid, _) = init_fragments_index().await; (server.into_shared(), uid) }) .await; server._index(uid).to_shared() } -async fn create_mock(indexing_fragments: Value, search_fragments: Value) -> (MockServer, Value) { +pub async fn init_fragments_index() -> (Server, String, crate::common::Value) { let mock_server = MockServer::start().await; let text_to_embedding: BTreeMap<_, _> = vec![ @@ -99,22 +53,62 @@ async fn create_mock(indexing_fragments: Value, search_fragments: Value) -> (Moc .await; let url = mock_server.uri(); - let embedder_settings = json!({ - "source": "rest", - "url": url, - "dimensions": 3, - "request": "{{fragment}}", - "response": { - "data": "{{embedding}}" - }, - "indexingFragments": indexing_fragments, - "searchFragments": search_fragments, - "documentTemplate": "document template: {{dog.name}}", - }); + let server = Server::new().await; + let index = server.unique_index(); - (mock_server, embedder_settings) + let (_response, code) = server.set_features(json!({"multimodal": true})).await; + snapshot!(code, @"200 OK"); + + // Configure the index to use our mock embedder + let settings = json!({ + "embedders": { + "rest": { + "source": "rest", + "url": url, + "dimensions": 3, + "request": "{{fragment}}", + "response": { + "data": "{{embedding}}" + }, + "indexingFragments": { + "withBreed": {"value": "{{ doc.name }} is a {{ doc.breed }}"}, + "basic": {"value": "{{ doc.name }} is a dog"}, + }, + "searchFragments": { + "justBreed": {"value": "It's a {{ media.breed }}"}, + "justName": {"value": "{{ media.name }} is a dog"}, + "query": {"value": "Some pre-prompt for query {{ q }}"}, + } + }, + }, + }); + let (response, code) = index + .update_settings(settings.clone()) + .await; + snapshot!(code, @"202 Accepted"); + + let task = server.wait_task(response.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + + // Send documents + let documents = json!([ + {"id": 0, "name": "kefir"}, + {"id": 1, "name": "echo", "_vectors": { "rest": [1, 1, 1] }}, + {"id": 2, "name": "intel", "breed": "labrador"}, + {"id": 3, "name": "dustin", "breed": "bulldog"}, + ]); + let (value, code) = index.add_documents(documents, None).await; + snapshot!(code, @"202 Accepted"); + + let task = index.wait_task(value.uid()).await; + snapshot!(task["status"], @r###""succeeded""###); + + let uid = index.uid.clone(); + (server, uid, settings) } +// TODO: Test cannot pass both fragments and document + #[actix_rt::test] async fn indexing_fragments() { let index = shared_index_for_fragments().await; @@ -346,3 +340,141 @@ async fn search_with_query() { } "#); } + +#[actix_rt::test] +async fn deleting_fragments_deletes_vectors() { + let (server, uid, mut settings) = init_fragments_index().await; + let index = server.index(uid); + + settings["embedders"]["rest"]["indexingFragments"]["basic"] = serde_json::Value::Null; + + let (documents, code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + println!("Documents before update: {documents:?}"); + + let (response, code) = index + .update_settings(settings) + .await; + snapshot!(code, @"202 Accepted"); + let value = server.wait_task(response.uid()).await.succeeded(); + snapshot!(value, @r#" + { + "uid": "[uid]", + "batchUid": "[batch_uid]", + "indexUid": "[uuid]", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "embedders": { + "rest": { + "source": "rest", + "dimensions": 3, + "url": "[url]", + "indexingFragments": { + "basic": null, + "withBreed": { + "value": "{{ doc.name }} is a {{ doc.breed }}" + } + }, + "searchFragments": { + "justBreed": { + "value": "It's a {{ media.breed }}" + }, + "justName": { + "value": "{{ media.name }} is a dog" + }, + "query": { + "value": "Some pre-prompt for query {{ q }}" + } + }, + "request": "{{fragment}}", + "response": { + "data": "{{embedding}}" + } + } + } + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" + } + "#); + + let (documents, code) = index + .get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() }) + .await; + snapshot!(code, @"200 OK"); + snapshot!(json_string!(documents), @r#" + { + "results": [ + { + "id": 0, + "name": "kefir", + "_vectors": { + "rest": { + "embeddings": [], + "regenerate": true + } + } + }, + { + "id": 1, + "name": "echo", + "_vectors": { + "rest": { + "embeddings": [ + [ + 1.0, + 1.0, + 1.0 + ] + ], + "regenerate": false + } + } + }, + { + "id": 2, + "name": "intel", + "breed": "labrador", + "_vectors": { + "rest": { + "embeddings": [ + [ + 1.0, + 1.0, + 0.0 + ] + ], + "regenerate": true + } + } + }, + { + "id": 3, + "name": "dustin", + "breed": "bulldog", + "_vectors": { + "rest": { + "embeddings": [ + [ + -0.5, + 0.5, + 0.0 + ] + ], + "regenerate": true + } + } + } + ], + "offset": 0, + "limit": 20, + "total": 4 + } + "#); +}