4900: Indexer edition 2024 r=Kerollmops a=dureuill

This PR is implementing the indexer edition 2024, largely inspired by [the ideas from this blog post](https://blog.kerollmops.com/meilisearch-is-too-slow).

Fixes https://github.com/meilisearch/meilisearch/issues/4985

## Features
- Stream-first approach to reading documents.
- Minimum disk write operations.
- RAM usage-first approach to avoid modifying common bitmaps on disk but in memory.
- Reduced LMDB fragmentation by writing entries only once...
- ...computing the final version of the entries in parallel...
- ...and storing them in write-optimized data structures before sending them to the BTree (LMDB).
- Indexing in multiple transactions to improve large dataset support (dumps).


Co-authored-by: ManyTheFish <many@meilisearch.com>
Co-authored-by: Clément Renault <clement@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-11-21 16:19:10 +00:00 committed by GitHub
commit 19e6f675b3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
134 changed files with 15591 additions and 3843 deletions

View file

@ -57,7 +57,7 @@ meilisearch-types = { path = "../meilisearch-types" }
mimalloc = { version = "0.1.43", default-features = false }
mime = "0.3.17"
num_cpus = "1.16.0"
obkv = "0.2.2"
obkv = { git = "https://github.com/kerollmops/obkv", branch = "unsized-kvreader" }
once_cell = "1.19.0"
ordered-float = "4.2.1"
parking_lot = "0.12.3"

View file

@ -796,8 +796,10 @@ fn prepare_search<'t>(
let span = tracing::trace_span!(target: "search::vector", "embed_one");
let _entered = span.enter();
let deadline = std::time::Instant::now() + std::time::Duration::from_secs(10);
embedder
.embed_one(query.q.clone().unwrap())
.embed_one(query.q.clone().unwrap(), Some(deadline))
.map_err(milli::vector::Error::from)
.map_err(milli::Error::from)?
}
@ -1687,7 +1689,7 @@ fn add_non_formatted_ids_to_formatted_options(
fn make_document(
displayed_attributes: &BTreeSet<FieldId>,
field_ids_map: &FieldsIdsMap,
obkv: obkv::KvReaderU16,
obkv: &obkv::KvReaderU16,
) -> Result<Document, MeilisearchHttpError> {
let mut document = serde_json::Map::new();

View file

@ -1344,7 +1344,6 @@ async fn error_add_documents_missing_document_id() {
}
#[actix_rt::test]
#[should_panic]
async fn error_document_field_limit_reached_in_one_document() {
let server = Server::new().await;
let index = server.index("test");
@ -1361,7 +1360,7 @@ async fn error_document_field_limit_reached_in_one_document() {
let documents = json!([big_object]);
let (response, code) = index.update_documents(documents, Some("id")).await;
snapshot!(code, @"500 Internal Server Error");
snapshot!(code, @"202 Accepted");
let response = index.wait_task(response.uid()).await;
snapshot!(code, @"202 Accepted");
@ -1369,16 +1368,22 @@ async fn error_document_field_limit_reached_in_one_document() {
snapshot!(response,
@r###"
{
"uid": 1,
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "test",
"status": "succeeded",
"status": "failed",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
"indexedDocuments": 0
},
"error": {
"message": "A document cannot contain more than 65,535 fields.",
"code": "max_fields_limit_exceeded",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#max_fields_limit_exceeded"
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
@ -1676,7 +1681,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "The `_geo` field in the document with the id: `11` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
"message": "The `_geo` field in the document with the id: `\"11\"` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `\"foobar\"`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1714,7 +1719,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find latitude nor longitude in the document with the id: `11`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1752,7 +1757,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find latitude nor longitude in the document with the id: `11`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"message": "Could not find latitude nor longitude in the document with the id: `\"11\"`. Was expecting `_geo.lat` and `_geo.lng` fields.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1790,7 +1795,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.",
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1828,7 +1833,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.",
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1866,7 +1871,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.",
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1904,7 +1909,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.",
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1942,7 +1947,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude nor longitude in the document with the id: `11`. Was expecting finite numbers but instead got `false` and `true`.",
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `false` and `true`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -1980,7 +1985,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find longitude in the document with the id: `11`. Was expecting a `_geo.lng` field.",
"message": "Could not find longitude in the document with the id: `\"11\"`. Was expecting a `_geo.lng` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2018,7 +2023,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not find latitude in the document with the id: `11`. Was expecting a `_geo.lat` field.",
"message": "Could not find latitude in the document with the id: `\"11\"`. Was expecting a `_geo.lat` field.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2056,7 +2061,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude nor longitude in the document with the id: `11`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
"message": "Could not parse latitude nor longitude in the document with the id: `\"11\"`. Was expecting finite numbers but instead got `\"doggo\"` and `\"doggo\"`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2094,7 +2099,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "The `_geo` field in the document with the id: `11` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
"message": "The `_geo` field in the document with the id: `\"11\"` contains the following unexpected fields: `{\"doggo\":\"are the best\"}`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2133,7 +2138,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not parse longitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"message": "Could not parse longitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2170,7 +2175,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude in the document with the id: `12`. Was expecting a finite number but instead got `null`.",
"message": "Could not parse latitude in the document with the id: `\"12\"`. Was expecting a finite number but instead got `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2207,7 +2212,7 @@ async fn add_documents_invalid_geo_field() {
"indexedDocuments": 0
},
"error": {
"message": "Could not parse latitude nor longitude in the document with the id: `13`. Was expecting finite numbers but instead got `null` and `null`.",
"message": "Could not parse latitude nor longitude in the document with the id: `\"13\"`. Was expecting finite numbers but instead got `null` and `null`.",
"code": "invalid_document_geo_field",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_document_geo_field"
@ -2227,7 +2232,7 @@ async fn add_invalid_geo_and_then_settings() {
let index = server.index("test");
index.create(Some("id")).await;
// _geo is not an object
// _geo is not a correct object
let documents = json!([
{
"id": "11",
@ -2257,7 +2262,7 @@ async fn add_invalid_geo_and_then_settings() {
}
"###);
let (ret, code) = index.update_settings(json!({"sortableAttributes": ["_geo"]})).await;
let (ret, code) = index.update_settings(json!({ "sortableAttributes": ["_geo"] })).await;
snapshot!(code, @"202 Accepted");
let ret = index.wait_task(ret.uid()).await;
snapshot!(ret, @r###"

View file

@ -70,8 +70,8 @@ async fn geo_bounding_box_with_string_and_number() {
let documents = DOCUMENTS.clone();
index.update_settings_filterable_attributes(json!(["_geo"])).await;
index.update_settings_sortable_attributes(json!(["_geo"])).await;
index.add_documents(documents, None).await;
index.wait_task(2).await;
let (ret, _code) = index.add_documents(documents, None).await;
index.wait_task(ret.uid()).await.succeeded();
index
.search(

View file

@ -750,9 +750,9 @@ async fn test_score_details() {
],
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
},
"_rankingScoreDetails": {
@ -1543,9 +1543,9 @@ async fn simple_search_with_strange_synonyms() {
],
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
}
}
@ -1568,9 +1568,9 @@ async fn simple_search_with_strange_synonyms() {
],
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
}
}
@ -1593,9 +1593,9 @@ async fn simple_search_with_strange_synonyms() {
],
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
}
}
@ -1604,3 +1604,79 @@ async fn simple_search_with_strange_synonyms() {
})
.await;
}
#[actix_rt::test]
async fn change_attributes_settings() {
let server = Server::new().await;
let index = server.index("test");
index.update_settings(json!({ "searchableAttributes": ["father", "mother"] })).await;
let documents = NESTED_DOCUMENTS.clone();
index.add_documents(json!(documents), None).await;
index.wait_task(1).await;
index.update_settings(json!({ "searchableAttributes": ["father", "mother", "doggos"], "filterableAttributes": ["doggos"] })).await;
index.wait_task(2).await;
// search
index
.search(
json!({
"q": "bobby",
"attributesToRetrieve": ["id", "doggos"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"id": 852,
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
]
}
]
"###);
},
)
.await;
// filter
index
.search(
json!({
"q": "",
"filter": "doggos.age < 5",
"attributesToRetrieve": ["id", "doggos"]
}),
|response, code| {
assert_eq!(code, 200, "{}", response);
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"id": 852,
"doggos": [
{
"name": "bobby",
"age": 2
},
{
"name": "buddy",
"age": 4
}
]
}
]
"###);
},
)
.await;
}

View file

@ -113,9 +113,9 @@ async fn simple_search_single_index() {
],
"_vectors": {
"manual": [
-100.0,
340.0,
90.0
-100,
340,
90
]
}
}
@ -138,9 +138,9 @@ async fn simple_search_single_index() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
}
}
@ -182,9 +182,9 @@ async fn federation_single_search_single_index() {
],
"_vectors": {
"manual": [
-100.0,
340.0,
90.0
-100,
340,
90
]
},
"_federation": {
@ -305,9 +305,9 @@ async fn federation_two_search_single_index() {
],
"_vectors": {
"manual": [
-100.0,
340.0,
90.0
-100,
340,
90
]
},
"_federation": {
@ -325,9 +325,9 @@ async fn federation_two_search_single_index() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -480,9 +480,9 @@ async fn simple_search_two_indexes() {
],
"_vectors": {
"manual": [
-100.0,
340.0,
90.0
-100,
340,
90
]
}
}
@ -513,9 +513,9 @@ async fn simple_search_two_indexes() {
"cattos": "pésti",
"_vectors": {
"manual": [
1.0,
2.0,
3.0
1,
2,
3
]
}
},
@ -535,9 +535,9 @@ async fn simple_search_two_indexes() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
}
}
@ -585,9 +585,9 @@ async fn federation_two_search_two_indexes() {
],
"_vectors": {
"manual": [
-100.0,
340.0,
90.0
-100,
340,
90
]
},
"_federation": {
@ -613,9 +613,9 @@ async fn federation_two_search_two_indexes() {
"cattos": "pésti",
"_vectors": {
"manual": [
1.0,
2.0,
3.0
1,
2,
3
]
},
"_federation": {
@ -640,9 +640,9 @@ async fn federation_two_search_two_indexes() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -707,9 +707,9 @@ async fn federation_multiple_search_multiple_indexes() {
],
"_vectors": {
"manual": [
-100.0,
340.0,
90.0
-100,
340,
90
]
},
"_federation": {
@ -735,9 +735,9 @@ async fn federation_multiple_search_multiple_indexes() {
"cattos": "pésti",
"_vectors": {
"manual": [
1.0,
2.0,
3.0
1,
2,
3
]
},
"_federation": {
@ -773,9 +773,9 @@ async fn federation_multiple_search_multiple_indexes() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -793,9 +793,9 @@ async fn federation_multiple_search_multiple_indexes() {
],
"_vectors": {
"manual": [
10.0,
-23.0,
32.0
10,
-23,
32
]
},
"_federation": {
@ -824,9 +824,9 @@ async fn federation_multiple_search_multiple_indexes() {
],
"_vectors": {
"manual": [
10.0,
23.0,
32.0
10,
23,
32
]
},
"_federation": {
@ -869,9 +869,9 @@ async fn federation_multiple_search_multiple_indexes() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -898,9 +898,9 @@ async fn federation_multiple_search_multiple_indexes() {
],
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
},
"_federation": {
@ -1393,9 +1393,9 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() {
"cattos": "pésti",
"_vectors": {
"manual": [
1.0,
2.0,
3.0
1,
2,
3
]
},
"_federation": {
@ -1414,9 +1414,9 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() {
],
"_vectors": {
"manual": [
10.0,
23.0,
32.0
10,
23,
32
]
},
"_federation": {
@ -1442,9 +1442,9 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -1474,9 +1474,9 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() {
],
"_vectors": {
"manual": [
10.0,
23.0,
32.0
10,
23,
32
]
},
"_federation": {
@ -1522,9 +1522,9 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() {
"cattos": "pésti",
"_vectors": {
"manual": [
1.0,
2.0,
3.0
1,
2,
3
]
},
"_federation": {
@ -1550,9 +1550,9 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -1582,9 +1582,9 @@ async fn federation_sort_same_indexes_same_criterion_same_direction() {
],
"_vectors": {
"manual": [
10.0,
23.0,
32.0
10,
23,
32
]
},
"_federation": {
@ -1716,9 +1716,9 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() {
"cattos": "pésti",
"_vectors": {
"manual": [
1.0,
2.0,
3.0
1,
2,
3
]
},
"_federation": {
@ -1748,9 +1748,9 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() {
],
"_vectors": {
"manual": [
10.0,
23.0,
32.0
10,
23,
32
]
},
"_federation": {
@ -1769,9 +1769,9 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() {
],
"_vectors": {
"manual": [
10.0,
23.0,
32.0
10,
23,
32
]
},
"_federation": {
@ -1797,9 +1797,9 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -1845,9 +1845,9 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -1874,9 +1874,9 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() {
"cattos": "pésti",
"_vectors": {
"manual": [
1.0,
2.0,
3.0
1,
2,
3
]
},
"_federation": {
@ -1906,9 +1906,9 @@ async fn federation_sort_same_indexes_different_criterion_same_direction() {
],
"_vectors": {
"manual": [
10.0,
23.0,
32.0
10,
23,
32
]
},
"_federation": {
@ -2103,9 +2103,9 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -2124,9 +2124,9 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() {
],
"_vectors": {
"manual": [
10.0,
-23.0,
32.0
10,
-23,
32
]
},
"_federation": {
@ -2145,9 +2145,9 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() {
],
"_vectors": {
"manual": [
-100.0,
340.0,
90.0
-100,
340,
90
]
},
"_federation": {
@ -2166,9 +2166,9 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() {
],
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
},
"_federation": {
@ -2187,9 +2187,9 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() {
],
"_vectors": {
"manual": [
1.0,
2.0,
3.0
1,
2,
3
]
},
"_federation": {
@ -2228,9 +2228,9 @@ async fn federation_sort_different_indexes_same_criterion_same_direction() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -2415,9 +2415,9 @@ async fn federation_sort_different_ranking_rules() {
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -2436,9 +2436,9 @@ async fn federation_sort_different_ranking_rules() {
],
"_vectors": {
"manual": [
10.0,
-23.0,
32.0
10,
-23,
32
]
},
"_federation": {
@ -2457,9 +2457,9 @@ async fn federation_sort_different_ranking_rules() {
],
"_vectors": {
"manual": [
-100.0,
340.0,
90.0
-100,
340,
90
]
},
"_federation": {
@ -2478,9 +2478,9 @@ async fn federation_sort_different_ranking_rules() {
],
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
},
"_federation": {
@ -2499,9 +2499,9 @@ async fn federation_sort_different_ranking_rules() {
],
"_vectors": {
"manual": [
1.0,
2.0,
3.0
1,
2,
3
]
},
"_federation": {
@ -2716,9 +2716,9 @@ async fn federation_sort_different_indexes_different_criterion_same_direction()
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -2757,9 +2757,9 @@ async fn federation_sort_different_indexes_different_criterion_same_direction()
],
"_vectors": {
"manual": [
10.0,
-23.0,
32.0
10,
-23,
32
]
},
"_federation": {
@ -2778,9 +2778,9 @@ async fn federation_sort_different_indexes_different_criterion_same_direction()
],
"_vectors": {
"manual": [
-100.0,
340.0,
90.0
-100,
340,
90
]
},
"_federation": {
@ -2799,9 +2799,9 @@ async fn federation_sort_different_indexes_different_criterion_same_direction()
],
"_vectors": {
"manual": [
-100.0,
231.0,
32.0
-100,
231,
32
]
},
"_federation": {
@ -2820,9 +2820,9 @@ async fn federation_sort_different_indexes_different_criterion_same_direction()
],
"_vectors": {
"manual": [
1.0,
2.0,
3.0
1,
2,
3
]
},
"_federation": {
@ -2881,9 +2881,9 @@ async fn federation_sort_different_indexes_different_criterion_same_direction()
],
"_vectors": {
"manual": [
1.0,
2.0,
54.0
1,
2,
54
]
},
"_federation": {
@ -4346,10 +4346,10 @@ async fn federation_vector_two_indexes() {
let (response, code) = server
.multi_search(json!({"federation": {}, "queries": [
{"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}},
{"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "retrieveVectors": true},
// joyful and energetic first
{"indexUid": "vectors-sentiment", "vector": [0.8, 0.6], "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}},
{"indexUid": "vectors-sentiment", "q": "dog"},
{"indexUid": "vectors-sentiment", "vector": [0.8, 0.6], "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}, "retrieveVectors": true},
{"indexUid": "vectors-sentiment", "q": "dog", "retrieveVectors": true},
]}))
.await;
snapshot!(code, @"200 OK");
@ -4364,7 +4364,16 @@ async fn federation_vector_two_indexes() {
0.8,
0.09,
0.8
]
],
"sentiment": {
"embeddings": [
[
0.800000011920929,
0.30000001192092896
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-sentiment",
@ -4379,7 +4388,17 @@ async fn federation_vector_two_indexes() {
"sentiment": [
0.8,
0.3
]
],
"animal": {
"embeddings": [
[
0.800000011920929,
0.09000000357627869,
0.800000011920929
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-animal",
@ -4394,7 +4413,17 @@ async fn federation_vector_two_indexes() {
"sentiment": [
-1.0,
0.1
]
],
"animal": {
"embeddings": [
[
0.8500000238418579,
0.019999999552965164,
0.10000000149011612
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-animal",
@ -4410,7 +4439,16 @@ async fn federation_vector_two_indexes() {
0.9,
0.8,
0.05
]
],
"sentiment": {
"embeddings": [
[
-0.10000000149011612,
0.550000011920929
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-sentiment",
@ -4426,7 +4464,16 @@ async fn federation_vector_two_indexes() {
0.85,
0.02,
0.1
]
],
"sentiment": {
"embeddings": [
[
-1.0,
0.10000000149011612
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-sentiment",
@ -4441,7 +4488,17 @@ async fn federation_vector_two_indexes() {
"sentiment": [
-0.2,
0.65
]
],
"animal": {
"embeddings": [
[
0.800000011920929,
0.8999999761581421,
0.5
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-animal",
@ -4456,7 +4513,17 @@ async fn federation_vector_two_indexes() {
"sentiment": [
-0.1,
0.55
]
],
"animal": {
"embeddings": [
[
0.8999999761581421,
0.800000011920929,
0.05000000074505806
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-animal",
@ -4472,7 +4539,16 @@ async fn federation_vector_two_indexes() {
0.8,
0.9,
0.5
]
],
"sentiment": {
"embeddings": [
[
-0.20000000298023224,
0.6499999761581421
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-sentiment",
@ -4492,8 +4568,8 @@ async fn federation_vector_two_indexes() {
// hybrid search, distinct embedder
let (response, code) = server
.multi_search(json!({"federation": {}, "queries": [
{"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "showRankingScore": true},
{"indexUid": "vectors-sentiment", "vector": [-1, 0.6], "q": "beagle", "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}, "showRankingScore": true},
{"indexUid" : "vectors-animal", "vector": [1.0, 0.0, 0.5], "hybrid": {"semanticRatio": 1.0, "embedder": "animal"}, "showRankingScore": true, "retrieveVectors": true},
{"indexUid": "vectors-sentiment", "vector": [-1, 0.6], "q": "beagle", "hybrid": {"semanticRatio": 1.0, "embedder": "sentiment"}, "showRankingScore": true, "retrieveVectors": true,},
]}))
.await;
snapshot!(code, @"200 OK");
@ -4507,7 +4583,17 @@ async fn federation_vector_two_indexes() {
"sentiment": [
0.8,
0.3
]
],
"animal": {
"embeddings": [
[
0.800000011920929,
0.09000000357627869,
0.800000011920929
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-animal",
@ -4523,7 +4609,17 @@ async fn federation_vector_two_indexes() {
"sentiment": [
-1.0,
0.1
]
],
"animal": {
"embeddings": [
[
0.8500000238418579,
0.019999999552965164,
0.10000000149011612
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-animal",
@ -4540,7 +4636,16 @@ async fn federation_vector_two_indexes() {
0.85,
0.02,
0.1
]
],
"sentiment": {
"embeddings": [
[
-1.0,
0.10000000149011612
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-sentiment",
@ -4557,7 +4662,16 @@ async fn federation_vector_two_indexes() {
0.8,
0.9,
0.5
]
],
"sentiment": {
"embeddings": [
[
-0.20000000298023224,
0.6499999761581421
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-sentiment",
@ -4573,7 +4687,17 @@ async fn federation_vector_two_indexes() {
"sentiment": [
-0.2,
0.65
]
],
"animal": {
"embeddings": [
[
0.800000011920929,
0.8999999761581421,
0.5
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-animal",
@ -4589,7 +4713,17 @@ async fn federation_vector_two_indexes() {
"sentiment": [
-0.1,
0.55
]
],
"animal": {
"embeddings": [
[
0.8999999761581421,
0.800000011920929,
0.05000000074505806
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-animal",
@ -4606,7 +4740,16 @@ async fn federation_vector_two_indexes() {
0.9,
0.8,
0.05
]
],
"sentiment": {
"embeddings": [
[
-0.10000000149011612,
0.550000011920929
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-sentiment",
@ -4623,7 +4766,16 @@ async fn federation_vector_two_indexes() {
0.8,
0.09,
0.8
]
],
"sentiment": {
"embeddings": [
[
0.800000011920929,
0.30000001192092896
]
],
"regenerate": false
}
},
"_federation": {
"indexUid": "vectors-sentiment",

View file

@ -250,7 +250,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
"message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -280,7 +280,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Missing field `regenerate` inside `.manual`",
"message": "Bad embedder configuration in the document with id: `0`. Missing field `._vectors.manual.regenerate`\n - note: `._vectors.manual` must be an array of floats, an array of arrays of floats, or an object with field `regenerate`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -311,7 +311,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.regenerate`: expected a boolean, but found a string: `\"yes please\"`",
"message": "Bad embedder configuration in the document with id: `0`. Could not parse `._vectors.manual.regenerate`: invalid type: string \"yes please\", expected a boolean at line 1 column 26",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -323,8 +323,7 @@ async fn user_provided_embeddings_error() {
}
"###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true }}});
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": true, "regenerate": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
@ -341,7 +340,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings`: expected null or an array, but found a boolean: `true`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings`: expected null or an array, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -353,8 +352,7 @@ async fn user_provided_embeddings_error() {
}
"###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true] }}});
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [true], "regenerate": true }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
@ -371,7 +369,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0]`: expected a number or an array, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -383,8 +381,7 @@ async fn user_provided_embeddings_error() {
}
"###);
let documents =
json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]] }}});
let documents = json!({"id": 0, "name": "kefir", "_vectors": { "manual": { "embeddings": [[true]], "regenerate": false }}});
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
@ -401,7 +398,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][0]`: expected a number, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -443,7 +440,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected a number, but found an array: `[0.2,0.3]`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -472,7 +469,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[1]`: expected an array, but found a number: `0.3`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -501,7 +498,7 @@ async fn user_provided_embeddings_error() {
"indexedDocuments": 0
},
"error": {
"message": "Bad embedder configuration in the document with id: `\"0\"`. Invalid value type at `.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
"message": "Bad embedder configuration in the document with id: `0`. Invalid value type at `._vectors.manual.embeddings[0][1]`: expected a number, but found a boolean: `true`",
"code": "invalid_vectors_type",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#invalid_vectors_type"
@ -542,7 +539,7 @@ async fn user_provided_vectors_error() {
"indexedDocuments": 0
},
"error": {
"message": "While embedding documents for embedder `manual`: no vectors provided for document \"40\" and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
"message": "While embedding documents for embedder `manual`: no vectors provided for document `40` and at least 4 other document(s)\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: opt-out for a document with `_vectors.manual: null`",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -572,7 +569,7 @@ async fn user_provided_vectors_error() {
"indexedDocuments": 0
},
"error": {
"message": "While embedding documents for embedder `manual`: no vectors provided for document \"42\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
"message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vector` by `_vectors` in 1 document(s).",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"
@ -602,7 +599,7 @@ async fn user_provided_vectors_error() {
"indexedDocuments": 0
},
"error": {
"message": "While embedding documents for embedder `manual`: no vectors provided for document \"42\"\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
"message": "While embedding documents for embedder `manual`: no vectors provided for document `42`\n- Note: `manual` has `source: userProvided`, so documents must provide embeddings as an array in `_vectors.manual`.\n- Hint: try replacing `_vectors.manaul` by `_vectors.manual` in 1 document(s).",
"code": "vector_embedding_error",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#vector_embedding_error"

View file

@ -137,13 +137,14 @@ fn long_text() -> &'static str {
}
async fn create_mock_tokenized() -> (MockServer, Value) {
create_mock_with_template("{{doc.text}}", ModelDimensions::Large, false).await
create_mock_with_template("{{doc.text}}", ModelDimensions::Large, false, false).await
}
async fn create_mock_with_template(
document_template: &str,
model_dimensions: ModelDimensions,
fallible: bool,
slow: bool,
) -> (MockServer, Value) {
let mock_server = MockServer::start().await;
const API_KEY: &str = "my-api-key";
@ -154,7 +155,11 @@ async fn create_mock_with_template(
Mock::given(method("POST"))
.and(path("/"))
.respond_with(move |req: &Request| {
// 0. maybe return 500
// 0. wait for a long time
if slow {
std::thread::sleep(std::time::Duration::from_secs(1));
}
// 1. maybe return 500
if fallible {
let attempt = attempt.fetch_add(1, Ordering::Relaxed);
let failed = matches!(attempt % 4, 0 | 1 | 3);
@ -167,7 +172,7 @@ async fn create_mock_with_template(
}))
}
}
// 1. check API key
// 2. check API key
match req.headers.get("Authorization") {
Some(api_key) if api_key == API_KEY_BEARER => {
{}
@ -202,7 +207,7 @@ async fn create_mock_with_template(
)
}
}
// 2. parse text inputs
// 3. parse text inputs
let query: serde_json::Value = match req.body_json() {
Ok(query) => query,
Err(_error) => return ResponseTemplate::new(400).set_body_json(
@ -223,7 +228,7 @@ async fn create_mock_with_template(
panic!("Expected {model_dimensions:?}, got {query_model_dimensions:?}")
}
// 3. for each text, find embedding in responses
// 4. for each text, find embedding in responses
let serde_json::Value::Array(inputs) = &query["input"] else {
panic!("Unexpected `input` value")
};
@ -283,7 +288,7 @@ async fn create_mock_with_template(
"embedding": embedding,
})).collect();
// 4. produce output from embeddings
// 5. produce output from embeddings
ResponseTemplate::new(200).set_body_json(json!({
"object": "list",
"data": data,
@ -317,23 +322,27 @@ const DOGGO_TEMPLATE: &str = r#"{%- if doc.gender == "F" -%}Une chienne nommée
{%- endif %}, de race {{doc.breed}}."#;
async fn create_mock() -> (MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false).await
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, false, false).await
}
async fn create_mock_dimensions() -> (MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false).await
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large512, false, false).await
}
async fn create_mock_small_embedding_model() -> (MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false).await
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Small, false, false).await
}
async fn create_mock_legacy_embedding_model() -> (MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false).await
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Ada, false, false).await
}
async fn create_fallible_mock() -> (MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true).await
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, false).await
}
async fn create_slow_mock() -> (MockServer, Value) {
create_mock_with_template(DOGGO_TEMPLATE, ModelDimensions::Large, true, true).await
}
// basic test "it works"
@ -1884,4 +1893,115 @@ async fn it_still_works() {
]
"###);
}
// test with a server that responds 500 on 3 out of 4 calls
#[actix_rt::test]
async fn timeout() {
let (_mock, setting) = create_slow_mock().await;
let server = get_server_vector().await;
let index = server.index("doggo");
let (response, code) = index
.update_settings(json!({
"embedders": {
"default": setting,
},
}))
.await;
snapshot!(code, @"202 Accepted");
let task = server.wait_task(response.uid()).await;
snapshot!(task["status"], @r###""succeeded""###);
let documents = json!([
{"id": 0, "name": "kefir", "gender": "M", "birthyear": 2023, "breed": "Patou"},
]);
let (value, code) = index.add_documents(documents, None).await;
snapshot!(code, @"202 Accepted");
let task = index.wait_task(value.uid()).await;
snapshot!(task, @r###"
{
"uid": "[uid]",
"batchUid": "[batch_uid]",
"indexUid": "doggo",
"status": "succeeded",
"type": "documentAdditionOrUpdate",
"canceledBy": null,
"details": {
"receivedDocuments": 1,
"indexedDocuments": 1
},
"error": null,
"duration": "[duration]",
"enqueuedAt": "[date]",
"startedAt": "[date]",
"finishedAt": "[date]"
}
"###);
let (documents, _code) = index
.get_all_documents(GetAllDocumentsOptions { retrieve_vectors: true, ..Default::default() })
.await;
snapshot!(json_string!(documents, {".results.*._vectors.default.embeddings" => "[vector]"}), @r###"
{
"results": [
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou",
"_vectors": {
"default": {
"embeddings": "[vector]",
"regenerate": true
}
}
}
],
"offset": 0,
"limit": 20,
"total": 1
}
"###);
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 0.99, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["semanticHitCount"]), @"0");
snapshot!(json_string!(response["hits"]), @"[]");
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 0.99, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["semanticHitCount"]), @"1");
snapshot!(json_string!(response["hits"]), @r###"
[
{
"id": 0,
"name": "kefir",
"gender": "M",
"birthyear": 2023,
"breed": "Patou"
}
]
"###);
let (response, code) = index
.search_post(json!({
"q": "grand chien de berger des montagnes",
"hybrid": {"semanticRatio": 0.99, "embedder": "default"}
}))
.await;
snapshot!(code, @"200 OK");
snapshot!(json_string!(response["semanticHitCount"]), @"0");
snapshot!(json_string!(response["hits"]), @"[]");
}
// test with a server that wrongly responds 400

View file

@ -1,5 +1,4 @@
use std::collections::BTreeMap;
use std::sync::atomic::{AtomicUsize, Ordering};
use meili_snap::{json_string, snapshot};
use reqwest::IntoUrl;
@ -13,13 +12,22 @@ use crate::vector::{get_server_vector, GetAllDocumentsOptions};
async fn create_mock() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
let counter = AtomicUsize::new(0);
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST"))
.and(path("/"))
.respond_with(move |_req: &Request| {
let counter = counter.fetch_add(1, Ordering::Relaxed);
ResponseTemplate::new(200).set_body_json(json!({ "data": vec![counter; 3] }))
.respond_with(move |req: &Request| {
let text: String = req.body_json().unwrap();
ResponseTemplate::new(200).set_body_json(
json!({ "data": text_to_embedding.get(text.as_str()).unwrap_or(&[99., 99., 99.]) }),
)
})
.mount(&mock_server)
.await;
@ -32,13 +40,14 @@ async fn create_mock() -> (MockServer, Value) {
"request": "{{text}}",
"response": {
"data": "{{embedding}}"
}
},
"documentTemplate": "{{doc.name}}",
});
(mock_server, embedder_settings)
}
async fn create_mock_map() -> (MockServer, Value) {
async fn create_mock_default_template() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
let text_to_embedding: BTreeMap<_, _> = vec![
@ -97,7 +106,14 @@ struct SingleResponse {
async fn create_mock_multiple() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
let counter = AtomicUsize::new(0);
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST"))
.and(path("/"))
@ -115,8 +131,11 @@ async fn create_mock_multiple() -> (MockServer, Value) {
.input
.into_iter()
.map(|text| SingleResponse {
embedding: text_to_embedding
.get(text.as_str())
.unwrap_or(&[99., 99., 99.])
.to_vec(),
text,
embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3],
})
.collect();
@ -142,7 +161,8 @@ async fn create_mock_multiple() -> (MockServer, Value) {
},
"{{..}}"
]
}
},
"documentTemplate": "{{doc.name}}"
});
(mock_server, embedder_settings)
@ -156,7 +176,14 @@ struct SingleRequest {
async fn create_mock_single_response_in_array() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
let counter = AtomicUsize::new(0);
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST"))
.and(path("/"))
@ -171,8 +198,11 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
};
let output = vec![SingleResponse {
embedding: text_to_embedding
.get(req.input.as_str())
.unwrap_or(&[99., 99., 99.])
.to_vec(),
text: req.input,
embedding: vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3],
}];
let response = MultipleResponse { output };
@ -196,7 +226,8 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
"embedding": "{{embedding}}"
}
]
}
},
"documentTemplate": "{{doc.name}}"
});
(mock_server, embedder_settings)
@ -205,7 +236,14 @@ async fn create_mock_single_response_in_array() -> (MockServer, Value) {
async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
let counter = AtomicUsize::new(0);
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST"))
.and(path("/"))
@ -223,7 +261,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
}
}
let _req: String = match req.body_json() {
let req: String = match req.body_json() {
Ok(req) => req,
Err(error) => {
return ResponseTemplate::new(400).set_body_json(json!({
@ -232,7 +270,7 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
}
};
let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3];
let output = text_to_embedding.get(req.as_str()).unwrap_or(&[99., 99., 99.]).to_vec();
ResponseTemplate::new(200).set_body_json(output)
})
@ -245,7 +283,8 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
"url": url,
"request": "{{text}}",
"response": "{{embedding}}",
"headers": {"my-nonstandard-auth": "bearer of the ring"}
"headers": {"my-nonstandard-auth": "bearer of the ring"},
"documentTemplate": "{{doc.name}}"
});
(mock_server, embedder_settings)
@ -254,12 +293,19 @@ async fn create_mock_raw_with_custom_header() -> (MockServer, Value) {
async fn create_mock_raw() -> (MockServer, Value) {
let mock_server = MockServer::start().await;
let counter = AtomicUsize::new(0);
let text_to_embedding: BTreeMap<_, _> = vec![
// text -> embedding
("kefir", [0.0, 0.0, 0.0]),
("intel", [1.0, 1.0, 1.0]),
]
// turn into btree
.into_iter()
.collect();
Mock::given(method("POST"))
.and(path("/"))
.respond_with(move |req: &Request| {
let _req: String = match req.body_json() {
let req: String = match req.body_json() {
Ok(req) => req,
Err(error) => {
return ResponseTemplate::new(400).set_body_json(json!({
@ -268,7 +314,7 @@ async fn create_mock_raw() -> (MockServer, Value) {
}
};
let output = vec![counter.fetch_add(1, Ordering::Relaxed) as f32; 3];
let output = text_to_embedding.get(req.as_str()).unwrap_or(&[99., 99., 99.]).to_vec();
ResponseTemplate::new(200).set_body_json(output)
})
@ -281,29 +327,30 @@ async fn create_mock_raw() -> (MockServer, Value) {
"url": url,
"dimensions": 3,
"request": "{{text}}",
"response": "{{embedding}}"
"response": "{{embedding}}",
"documentTemplate": "{{doc.name}}"
});
(mock_server, embedder_settings)
}
pub async fn post<T: IntoUrl>(url: T) -> reqwest::Result<reqwest::Response> {
reqwest::Client::builder().build()?.post(url).send().await
pub async fn post<T: IntoUrl>(url: T, text: &str) -> reqwest::Result<reqwest::Response> {
reqwest::Client::builder().build()?.post(url).json(&json!(text)).send().await
}
#[actix_rt::test]
async fn dummy_testing_the_mock() {
let (mock, _setting) = create_mock().await;
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[0,0,0]}"###);
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[1,1,1]}"###);
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[2,2,2]}"###);
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[3,3,3]}"###);
let body = post(&mock.uri()).await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[4,4,4]}"###);
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
let body = post(&mock.uri(), "intel").await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[1.0,1.0,1.0]}"###);
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
let body = post(&mock.uri(), "kefir").await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[0.0,0.0,0.0]}"###);
let body = post(&mock.uri(), "intel").await.unwrap().text().await.unwrap();
snapshot!(body, @r###"{"data":[1.0,1.0,1.0]}"###);
}
#[actix_rt::test]
@ -954,7 +1001,7 @@ async fn bad_settings() {
let (response, code) = index
.update_settings(json!({
"embedders": {
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": { "data": "{{embedding}}" }, "dimensions": 2 }),
"rest": json!({ "source": "rest", "url": mock.uri(), "request": "{{text}}", "response": { "data": "{{embedding}}" }, "dimensions": 2, "documentTemplate": "{{doc.name}}" }),
},
}))
.await;
@ -1937,6 +1984,7 @@ async fn server_custom_header() {
"embedders": {
"rest": {
"source": "rest",
"documentTemplate": "{{doc.name}}",
"url": "[url]",
"request": "{{text}}",
"response": "{{embedding}}",
@ -1957,7 +2005,7 @@ async fn server_custom_header() {
#[actix_rt::test]
async fn searchable_reindex() {
let (_mock, setting) = create_mock_map().await;
let (_mock, setting) = create_mock_default_template().await;
let server = get_server_vector().await;
let index = server.index("doggo");