MeiliSearch/crates/meilisearch/tests/search/hybrid.rs

654 lines
31 KiB
Rust
Raw Normal View History

use meili_snap::snapshot;
2023-12-13 11:47:12 +01:00
use once_cell::sync::Lazy;
use crate::common::index::Index;
use crate::common::{Server, Value};
use crate::json;
2024-05-16 18:12:26 +02:00
async fn index_with_documents_user_provided<'a>(
server: &'a Server,
documents: &Value,
) -> Index<'a> {
2023-12-13 11:47:12 +01:00
let index = server.index("test");
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false
2023-12-13 11:47:12 +01:00
}
"###);
let (response, code) = index
2023-12-20 17:06:50 +01:00
.update_settings(json!({ "embedders": {"default": {
"source": "userProvided",
"dimensions": 2}}} ))
2023-12-13 11:47:12 +01:00
.await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
2023-12-13 11:47:12 +01:00
let (response, code) = index.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
2023-12-13 11:47:12 +01:00
index
}
2024-05-16 18:12:26 +02:00
async fn index_with_documents_hf<'a>(server: &'a Server, documents: &Value) -> Index<'a> {
let index = server.index("test");
let (response, code) = server.set_features(json!({"vectorStore": true})).await;
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response), @r###"
{
"vectorStore": true,
"metrics": false,
"logsRoute": false,
"editDocumentsByFunction": false,
"containsFilter": false
2024-05-16 18:12:26 +02:00
}
"###);
let (response, code) = index
.update_settings(json!({ "embedders": {"default": {
"source": "huggingFace",
"model": "sentence-transformers/all-MiniLM-L6-v2",
"revision": "e4ce9877abf3edfe10b0d82785e83bdcb973e22e",
"documentTemplate": "{{doc.title}}, {{doc.desc}}"
}}} ))
.await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index.add_documents(documents.clone(), None).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
index
}
static SIMPLE_SEARCH_DOCUMENTS_VEC: Lazy<Value> = Lazy::new(|| {
2023-12-13 11:47:12 +01:00
json!([
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
"_vectors": {"default": [1.0, 3.0]},
},
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
"_vectors": {"default": [1.0, 2.0]},
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
"_vectors": {"default": [2.0, 3.0]},
}])
});
2024-05-16 18:12:26 +02:00
static SINGLE_DOCUMENT_VEC: Lazy<Value> = Lazy::new(|| {
2024-01-03 12:45:13 +01:00
json!([{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
"_vectors": {"default": [1.0, 3.0]},
}])
});
2024-05-16 18:12:26 +02:00
static SIMPLE_SEARCH_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
json!([
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
},
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
}])
});
2023-12-13 11:47:12 +01:00
#[actix_rt::test]
async fn simple_search() {
let server = Server::new().await;
2024-05-16 18:12:26 +02:00
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
2023-12-13 11:47:12 +01:00
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true}),
2023-12-13 11:47:12 +01:00
)
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}}}]"###);
2024-04-03 09:35:07 +02:00
snapshot!(response["semanticHitCount"], @"0");
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.5, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
2024-04-03 09:35:07 +02:00
)
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
snapshot!(response["semanticHitCount"], @"2");
2023-12-13 11:47:12 +01:00
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.8, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
2023-12-13 11:47:12 +01:00
)
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
2024-04-03 09:35:07 +02:00
snapshot!(response["semanticHitCount"], @"3");
2023-12-13 11:47:12 +01:00
}
#[actix_rt::test]
async fn limit_offset() {
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.2, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}}}]"###);
snapshot!(response["semanticHitCount"], @"0");
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
let server = Server::new().await;
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"semanticRatio": 0.9, "embedder": "default"}, "retrieveVectors": true, "offset": 1, "limit": 1}),
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}}}]"###);
snapshot!(response["semanticHitCount"], @"1");
assert_eq!(response["hits"].as_array().unwrap().len(), 1);
}
2024-05-16 18:12:26 +02:00
#[actix_rt::test]
async fn simple_search_hf() {
let server = Server::new().await;
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
2024-09-17 16:30:04 +02:00
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"semanticRatio": 0.2, "embedder": "default"}}),
)
.await;
2024-05-16 18:12:26 +02:00
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
snapshot!(response["semanticHitCount"], @"0");
let (response, code) = index
.search_post(
// disable ranking score as the vectors between architectures are not equal
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.55}, "showRankingScore": false}),
2024-05-16 18:12:26 +02:00
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"}]"###);
snapshot!(response["semanticHitCount"], @"1");
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.8}, "showRankingScore": false}),
2024-05-16 18:12:26 +02:00
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###);
snapshot!(response["semanticHitCount"], @"3");
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Movie World", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
2024-05-16 18:12:26 +02:00
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}]"###);
snapshot!(response["semanticHitCount"], @"3");
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Wonder replacement", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "showRankingScore": false}),
2024-05-16 18:12:26 +02:00
)
.await;
snapshot!(code, @"200 OK");
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"}]"###);
snapshot!(response["semanticHitCount"], @"3");
}
2024-03-27 14:02:09 +01:00
#[actix_rt::test]
async fn distribution_shift() {
let server = Server::new().await;
2024-05-16 18:12:26 +02:00
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
2024-03-27 14:02:09 +01:00
2024-09-17 16:30:04 +02:00
let search = json!({"q": "Captain", "vector": [1.0, 1.0], "showRankingScore": true, "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "retrieveVectors": true});
2024-03-27 14:02:09 +01:00
let (response, code) = index.search_post(search.clone()).await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9472135901451112}]"###);
2024-03-27 14:02:09 +01:00
let (response, code) = index
.update_settings(json!({
"embedders": {
"default": {
"distribution": {
"mean": 0.998,
"sigma": 0.01
}
}
}
}))
.await;
snapshot!(code, @"202 Accepted");
let response = server.wait_task(response.uid()).await;
snapshot!(response["details"], @r###"{"embedders":{"default":{"distribution":{"mean":0.998,"sigma":0.01}}}}"###);
let (response, code) = index.search_post(search).await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.19161224365234375},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.1920928955078125e-7},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.1920928955078125e-7}]"###);
2024-03-27 14:02:09 +01:00
}
2024-01-23 15:09:49 +01:00
#[actix_rt::test]
async fn highlighter() {
let server = Server::new().await;
2024-05-16 18:12:26 +02:00
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
2024-01-23 15:09:49 +01:00
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
2024-09-17 16:30:04 +02:00
"hybrid": {"embedder": "default", "semanticRatio": 0.2},
"retrieveVectors": true,
"attributesToHighlight": [
"desc",
"_vectors",
2024-01-23 15:09:49 +01:00
],
"highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**",
2024-01-23 15:09:49 +01:00
}))
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"}},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"}},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"}}]"###);
2024-04-03 09:35:07 +02:00
snapshot!(response["semanticHitCount"], @"0");
2024-01-23 15:09:49 +01:00
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
2024-09-17 16:30:04 +02:00
"hybrid": {"embedder": "default", "semanticRatio": 0.8},
"retrieveVectors": true,
2024-04-03 14:29:17 +02:00
"showRankingScore": true,
2024-01-23 15:09:49 +01:00
"attributesToHighlight": [
"desc"
],
"highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**"
}))
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the **BEGIN**Marvel**END** Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a **BEGIN**Captain**END** **BEGIN**Marvel**END** ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
2024-04-03 09:35:07 +02:00
snapshot!(response["semanticHitCount"], @"3");
2024-01-23 15:09:49 +01:00
// no highlighting on full semantic
let (response, code) = index
.search_post(json!({"q": "Captain Marvel", "vector": [1.0, 1.0],
2024-09-17 16:30:04 +02:00
"hybrid": {"embedder": "default", "semanticRatio": 1.0},
"retrieveVectors": true,
2024-04-03 14:29:17 +02:00
"showRankingScore": true,
2024-01-23 15:09:49 +01:00
"attributesToHighlight": [
"desc"
],
"highlightPreTag": "**BEGIN**",
"highlightPostTag": "**END**"
}))
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_formatted":{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3"},"_rankingScore":0.990290343761444},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_formatted":{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2"},"_rankingScore":0.974341630935669},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_formatted":{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1"},"_rankingScore":0.9472135901451112}]"###);
2024-04-03 09:35:07 +02:00
snapshot!(response["semanticHitCount"], @"3");
2024-01-23 15:09:49 +01:00
}
2023-12-13 11:47:12 +01:00
#[actix_rt::test]
async fn invalid_semantic_ratio() {
let server = Server::new().await;
2024-05-16 18:12:26 +02:00
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
2023-12-13 11:47:12 +01:00
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.2}}),
2023-12-13 11:47:12 +01:00
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
2023-12-14 11:21:25 +01:00
"message": "Invalid value at `.hybrid.semanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
2023-12-13 11:47:12 +01:00
"code": "invalid_search_semantic_ratio",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
}
"###);
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "vector": [1.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": -0.8}}),
2023-12-13 11:47:12 +01:00
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
2023-12-14 11:21:25 +01:00
"message": "Invalid value at `.hybrid.semanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
2023-12-13 11:47:12 +01:00
"code": "invalid_search_semantic_ratio",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
}
"###);
let (response, code) = index
.search_get(
&yaup::to_string(
2024-09-17 16:30:04 +02:00
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": 1.2}),
2023-12-13 11:47:12 +01:00
)
.unwrap(),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
2023-12-14 11:21:25 +01:00
"message": "Invalid value in parameter `hybridSemanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
2023-12-13 11:47:12 +01:00
"code": "invalid_search_semantic_ratio",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
}
"###);
let (response, code) = index
.search_get(
&yaup::to_string(
2024-09-17 16:30:04 +02:00
&json!({"q": "Captain", "vector": [1.0, 1.0], "hybridEmbedder": "default", "hybridSemanticRatio": -0.2}),
2023-12-13 11:47:12 +01:00
)
.unwrap(),
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
2023-12-14 11:21:25 +01:00
"message": "Invalid value in parameter `hybridSemanticRatio`: the value of `semanticRatio` is invalid, expected a float between `0.0` and `1.0`.",
2023-12-13 11:47:12 +01:00
"code": "invalid_search_semantic_ratio",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_search_semantic_ratio"
}
"###);
}
2024-01-03 12:45:13 +01:00
#[actix_rt::test]
async fn single_document() {
let server = Server::new().await;
2024-05-16 18:12:26 +02:00
let index = index_with_documents_user_provided(&server, &SINGLE_DOCUMENT_VEC).await;
2024-01-03 12:45:13 +01:00
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"vector": [1.0, 3.0], "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
2024-01-03 12:45:13 +01:00
)
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"][0], @r###"{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0}"###);
2024-04-03 09:35:07 +02:00
snapshot!(response["semanticHitCount"], @"1");
}
#[actix_rt::test]
async fn query_combination() {
let server = Server::new().await;
2024-05-16 18:12:26 +02:00
let index = index_with_documents_user_provided(&server, &SIMPLE_SEARCH_DOCUMENTS_VEC).await;
2024-04-03 09:35:07 +02:00
// search without query and vector, but with hybrid => still placeholder
let (response, code) = index
2024-09-17 16:30:04 +02:00
.search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
2024-04-03 09:35:07 +02:00
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###);
2024-04-03 10:23:01 +02:00
snapshot!(response["semanticHitCount"], @"null");
// same with a different semantic ratio
let (response, code) = index
2024-09-17 16:30:04 +02:00
.search_post(json!({"hybrid": {"embedder": "default", "semanticRatio": 0.76}, "showRankingScore": true, "retrieveVectors": true}))
2024-04-03 10:23:01 +02:00
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###);
2024-04-03 10:23:01 +02:00
snapshot!(response["semanticHitCount"], @"null");
// wrong vector dimensions
let (response, code) = index
2024-09-17 16:30:04 +02:00
.search_post(json!({"vector": [1.0, 0.0, 1.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
2024-04-03 10:23:01 +02:00
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid vector dimensions: expected: `2`, found: `3`.",
"code": "invalid_vector_dimensions",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vector_dimensions"
}
"###);
// full vector
let (response, code) = index
2024-09-17 16:30:04 +02:00
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 1.0}, "showRankingScore": true, "retrieveVectors": true}))
2024-04-03 10:23:01 +02:00
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.7773500680923462},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.7236068248748779},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.6581138968467712}]"###);
2024-04-03 10:23:01 +02:00
snapshot!(response["semanticHitCount"], @"3");
// full keyword, without a query
let (response, code) = index
2024-09-17 16:30:04 +02:00
.search_post(json!({"vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
2024-04-03 10:23:01 +02:00
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":1.0},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":1.0}]"###);
2024-04-03 10:23:01 +02:00
snapshot!(response["semanticHitCount"], @"null");
// query + vector, full keyword => keyword
let (response, code) = index
2024-09-17 16:30:04 +02:00
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "hybrid": {"embedder": "default", "semanticRatio": 0.0}, "showRankingScore": true, "retrieveVectors": true}))
2024-04-03 10:23:01 +02:00
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Captain Marvel","desc":"a Shazam ersatz","id":"3","_vectors":{"default":{"embeddings":[[2.0,3.0]],"regenerate":false}},"_rankingScore":0.9848484848484848},{"title":"Shazam!","desc":"a Captain Marvel ersatz","id":"1","_vectors":{"default":{"embeddings":[[1.0,3.0]],"regenerate":false}},"_rankingScore":0.9242424242424242}]"###);
2024-04-03 10:23:01 +02:00
snapshot!(response["semanticHitCount"], @"null");
// query + vector, no hybrid keyword =>
let (response, code) = index
.search_post(json!({"q": "Captain", "vector": [1.0, 0.0], "showRankingScore": true, "retrieveVectors": true}))
2024-04-03 10:23:01 +02:00
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
2024-09-17 16:30:04 +02:00
"message": "Invalid request: missing `hybrid` parameter when `vector` is present.",
2024-04-03 10:23:01 +02:00
"code": "missing_search_hybrid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_search_hybrid"
}
"###);
// full vector, without a vector => error
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "hybrid": {"semanticRatio": 1.0, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
2024-04-03 10:23:01 +02:00
)
.await;
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
2024-06-12 18:27:03 +02:00
"message": "Error while generating embeddings: user error: attempt to embed the following text in a configuration where embeddings must be user provided:\n - `Captain`",
2024-04-03 10:23:01 +02:00
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
}
"###);
// hybrid without a vector => full keyword
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Planet", "hybrid": {"semanticRatio": 0.99, "embedder": "default"}, "showRankingScore": true, "retrieveVectors": true}),
2024-04-03 10:23:01 +02:00
)
.await;
snapshot!(code, @"200 OK");
2024-06-12 18:13:34 +02:00
snapshot!(response["hits"], @r###"[{"title":"Captain Planet","desc":"He's not part of the Marvel Cinematic Universe","id":"2","_vectors":{"default":{"embeddings":[[1.0,2.0]],"regenerate":false}},"_rankingScore":0.9242424242424242}]"###);
2024-04-03 10:23:01 +02:00
snapshot!(response["semanticHitCount"], @"0");
2024-01-03 12:45:13 +01:00
}
#[actix_rt::test]
async fn retrieve_vectors() {
let server = Server::new().await;
let index = index_with_documents_hf(&server, &SIMPLE_SEARCH_DOCUMENTS).await;
let (response, code) = index
.search_post(
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###"
[
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
},
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
}
]
"###);
// use explicit `_vectors` in displayed attributes
let (response, code) = index
.update_settings(json!({ "displayedAttributes": ["id", "title", "desc", "_vectors"]} ))
.await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###"
[
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
},
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1",
"_vectors": {
"default": {
"embeddings": "[vectors]",
"regenerate": true
}
}
}
]
"###);
// remove `_vectors` from displayed attributes
let (response, code) =
index.update_settings(json!({ "displayedAttributes": ["id", "title", "desc"]} )).await;
assert_eq!(202, code, "{:?}", response);
index.wait_task(response.uid()).await;
let (response, code) = index
.search_post(
2024-09-17 16:30:04 +02:00
json!({"q": "Captain", "hybrid": {"embedder": "default", "semanticRatio": 0.2}, "retrieveVectors": true}),
)
.await;
snapshot!(code, @"200 OK");
insta::assert_json_snapshot!(response["hits"], {"[]._vectors.default.embeddings" => "[vectors]"}, @r###"
[
{
"title": "Captain Planet",
"desc": "He's not part of the Marvel Cinematic Universe",
"id": "2"
},
{
"title": "Captain Marvel",
"desc": "a Shazam ersatz",
"id": "3"
},
{
"title": "Shazam!",
"desc": "a Captain Marvel ersatz",
"id": "1"
}
]
"###);
}