Fix tests

This commit is contained in:
Louis Dureuil 2025-06-30 00:09:48 +02:00
parent c593fbe648
commit 11e7c0d75f
No known key found for this signature in database
3 changed files with 155 additions and 77 deletions

View file

@ -690,11 +690,20 @@ fn test_settings_update() {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let configs = index.embedding_configs(&rtxn).unwrap();
let IndexEmbeddingConfig { name, config, user_provided } = configs.first().unwrap();
let embedders = index.embedding_configs();
let configs = embedders.embedding_configs(&rtxn).unwrap();
let IndexEmbeddingConfig { name, config, fragments } = configs.first().unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"0");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
insta::assert_snapshot!(name, @"default");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_json_snapshot!(config.embedder_options);
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
}
#[test]

View file

@ -3,13 +3,14 @@ use std::collections::BTreeMap;
use big_s::S;
use insta::assert_json_snapshot;
use meili_snap::{json_string, snapshot};
use meilisearch_types::milli::index::IndexEmbeddingConfig;
use meilisearch_types::milli::update::Setting;
use meilisearch_types::milli::vector::settings::EmbeddingSettings;
use meilisearch_types::milli::vector::SearchQuery;
use meilisearch_types::milli::{self, obkv_to_json};
use meilisearch_types::settings::{SettingEmbeddingSettings, Settings, Unchecked};
use meilisearch_types::tasks::KindWithContent;
use milli::update::IndexDocumentsMethod::*;
use milli::vector::db::IndexEmbeddingConfig;
use crate::insta_snapshot::snapshot_index_scheduler;
use crate::test_utils::read_json;
@ -85,28 +86,51 @@ fn import_vectors() {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let configs = index.embedding_configs(&rtxn).unwrap();
let embedders = index.embedding_configs();
let configs = embedders.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: fakerest_config, user_provided } =
let IndexEmbeddingConfig { name, config: fakerest_config, fragments } =
configs.get(0).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"0");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
insta::assert_json_snapshot!(fakerest_config.embedder_options);
let fakerest_name = name.clone();
let IndexEmbeddingConfig { name, config: simple_hf_config, user_provided } =
let IndexEmbeddingConfig { name, config: simple_hf_config, fragments } =
configs.get(1).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"1");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
insta::assert_json_snapshot!(simple_hf_config.embedder_options);
let simple_hf_name = name.clone();
let configs = index_scheduler.embedders("doggos".to_string(), configs).unwrap();
let (hf_embedder, _, _) = configs.get(&simple_hf_name).unwrap();
let beagle_embed = hf_embedder.embed_search("Intel the beagle best doggo", None).unwrap();
let lab_embed = hf_embedder.embed_search("Max the lab best doggo", None).unwrap();
let patou_embed = hf_embedder.embed_search("kefir the patou best doggo", None).unwrap();
let hf_runtime = configs.get(&simple_hf_name).unwrap();
let hf_embedder = &hf_runtime.embedder;
let beagle_embed = hf_embedder
.embed_search(SearchQuery::Text("Intel the beagle best doggo"), None)
.unwrap();
let lab_embed =
hf_embedder.embed_search(SearchQuery::Text("Max the lab best doggo"), None).unwrap();
let patou_embed = hf_embedder
.embed_search(SearchQuery::Text("kefir the patou best doggo"), None)
.unwrap();
(fakerest_name, simple_hf_name, beagle_embed, lab_embed, patou_embed)
};
@ -166,22 +190,38 @@ fn import_vectors() {
let rtxn = index.read_txn().unwrap();
// Ensure the document have been inserted into the relevant bitamp
let configs = index.embedding_configs(&rtxn).unwrap();
let embedders = index.embedding_configs();
let configs = embedders.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
configs.get(0).unwrap();
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"0");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"1");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
let embeddings = index.embeddings(&rtxn, 0).unwrap();
assert_json_snapshot!(embeddings[&simple_hf_name][0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == lab_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@ -239,25 +279,41 @@ fn import_vectors() {
let index = index_scheduler.index("doggos").unwrap();
let rtxn = index.read_txn().unwrap();
let embedders = index.embedding_configs();
// Ensure the document have been inserted into the relevant bitamp
let configs = index.embedding_configs(&rtxn).unwrap();
let configs = embedders.embedding_configs(&rtxn).unwrap();
// for consistency with the below
#[allow(clippy::get_first)]
let IndexEmbeddingConfig { name, config: _, user_provided: user_defined } =
configs.get(0).unwrap();
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(0).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"0");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[0]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[0]>");
insta::assert_snapshot!(name, @"A_fakerest");
insta::assert_debug_snapshot!(user_defined, @"RoaringBitmap<[0]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
let IndexEmbeddingConfig { name, config: _, user_provided } = configs.get(1).unwrap();
let IndexEmbeddingConfig { name, config: _, fragments } = configs.get(1).unwrap();
let info = embedders.embedder_info(&rtxn, name).unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"1");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[]>");
insta::assert_snapshot!(name, @"B_small_hf");
insta::assert_debug_snapshot!(user_provided, @"RoaringBitmap<[]>");
insta::assert_debug_snapshot!(fragments, @r###"
FragmentConfigs(
[],
)
"###);
let embeddings = index.embeddings(&rtxn, 0).unwrap();
// automatically changed to patou because set to regenerate
assert_json_snapshot!(embeddings[&simple_hf_name][0] == patou_embed, @"true");
assert_json_snapshot!(embeddings[&simple_hf_name].0[0] == patou_embed, @"true");
// remained beagle
assert_json_snapshot!(embeddings[&fakerest_name][0] == beagle_embed, @"true");
assert_json_snapshot!(embeddings[&fakerest_name].0[0] == beagle_embed, @"true");
let doc = index.documents(&rtxn, std::iter::once(0)).unwrap()[0].1;
let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
@ -400,7 +456,7 @@ fn import_vectors_first_and_embedder_later() {
// the all the vectors linked to the new specified embedder have been removed
// Only the unknown embedders stays in the document DB
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"},{"id":1,"doggo":"intel","_vectors":{"unknown embedder":[1,2,3]}},{"id":2,"doggo":"max","_vectors":{"unknown embedder":[4,5]}},{"id":3,"doggo":"marcel"},{"id":4,"doggo":"sora"}]"###);
let conf = index.embedding_configs(&rtxn).unwrap();
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
// even though we specified the vector for the ID 3, it shouldn't be marked
// as user provided since we explicitely marked it as NOT user provided.
snapshot!(format!("{conf:#?}"), @r###"
@ -426,19 +482,28 @@ fn import_vectors_first_and_embedder_later() {
},
quantized: None,
},
user_provided: RoaringBitmap<[1, 2]>,
fragments: FragmentConfigs(
[],
),
},
]
"###);
let info =
index.embedding_configs().embedder_info(&rtxn, "my_doggo_embedder").unwrap().unwrap();
insta::assert_snapshot!(info.embedder_id, @"0");
insta::assert_debug_snapshot!(info.embedding_status.user_provided_docids(), @"RoaringBitmap<[1, 2, 3]>");
insta::assert_debug_snapshot!(info.embedding_status.skip_regenerate_docids(), @"RoaringBitmap<[1, 2]>");
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["my_doggo_embedder"];
let (embedding, _) = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty(), "{embedding:?}");
// the document with the id 3 should keep its original embedding
let docid = index.external_documents_ids.get(&rtxn, "3").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embeddings = &embeddings["my_doggo_embedder"];
let (embeddings, _) = &embeddings["my_doggo_embedder"];
snapshot!(embeddings.len(), @"1");
assert!(embeddings[0].iter().all(|i| *i == 3.0), "{:?}", embeddings[0]);
@ -493,7 +558,7 @@ fn import_vectors_first_and_embedder_later() {
"###);
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["my_doggo_embedder"];
let (embedding, _) = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
assert!(!embedding[0].iter().all(|i| *i == 3.0), "{:?}", embedding[0]);
@ -501,7 +566,7 @@ fn import_vectors_first_and_embedder_later() {
// the document with the id 4 should generate an embedding
let docid = index.external_documents_ids.get(&rtxn, "4").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["my_doggo_embedder"];
let (embedding, _) = &embeddings["my_doggo_embedder"];
assert!(!embedding.is_empty());
}
@ -603,33 +668,35 @@ fn delete_document_containing_vector() {
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @r###"[{"id":0,"doggo":"kefir"}]"###);
let conf = index.embedding_configs(&rtxn).unwrap();
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
snapshot!(format!("{conf:#?}"), @r###"
[
IndexEmbeddingConfig {
name: "manual",
config: EmbeddingConfig {
embedder_options: UserProvided(
EmbedderOptions {
dimensions: 3,
distribution: None,
},
),
prompt: PromptData {
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
max_bytes: Some(
400,
),
[
IndexEmbeddingConfig {
name: "manual",
config: EmbeddingConfig {
embedder_options: UserProvided(
EmbedderOptions {
dimensions: 3,
distribution: None,
},
quantized: None,
),
prompt: PromptData {
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
max_bytes: Some(
400,
),
},
user_provided: RoaringBitmap<[0]>,
quantized: None,
},
]
"###);
fragments: FragmentConfigs(
[],
),
},
]
"###);
let docid = index.external_documents_ids.get(&rtxn, "0").unwrap().unwrap();
let embeddings = index.embeddings(&rtxn, docid).unwrap();
let embedding = &embeddings["manual"];
let (embedding, _) = &embeddings["manual"];
assert!(!embedding.is_empty(), "{embedding:?}");
index_scheduler
@ -647,30 +714,32 @@ fn delete_document_containing_vector() {
.map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
.collect::<Vec<_>>();
snapshot!(serde_json::to_string(&documents).unwrap(), @"[]");
let conf = index.embedding_configs(&rtxn).unwrap();
let conf = index.embedding_configs().embedding_configs(&rtxn).unwrap();
snapshot!(format!("{conf:#?}"), @r###"
[
IndexEmbeddingConfig {
name: "manual",
config: EmbeddingConfig {
embedder_options: UserProvided(
EmbedderOptions {
dimensions: 3,
distribution: None,
},
),
prompt: PromptData {
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
max_bytes: Some(
400,
),
[
IndexEmbeddingConfig {
name: "manual",
config: EmbeddingConfig {
embedder_options: UserProvided(
EmbedderOptions {
dimensions: 3,
distribution: None,
},
quantized: None,
),
prompt: PromptData {
template: "{% for field in fields %}{% if field.is_searchable and field.value != nil %}{{ field.name }}: {{ field.value }}\n{% endif %}{% endfor %}",
max_bytes: Some(
400,
),
},
user_provided: RoaringBitmap<[]>,
quantized: None,
},
]
"###);
fragments: FragmentConfigs(
[],
),
},
]
"###);
}
#[test]

View file

@ -499,7 +499,7 @@ async fn query_combination() {
snapshot!(code, @"400 Bad Request");
snapshot!(response, @r###"
{
"message": "Invalid request: missing `hybrid` parameter when `vector` is present.",
"message": "Invalid request: missing `hybrid` parameter when `vector` or `media` are present.",
"code": "missing_search_hybrid",
"type": "invalid_request",
"link": "https://docs.meilisearch.com/errors#missing_search_hybrid"