mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Add unit test for prefix handling by the proximity criterion
This commit is contained in:
parent
777b387dc4
commit
f097aafa1c
@ -592,4 +592,101 @@ fn resolve_plane_sweep_candidates(
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {}
|
||||
mod tests {
|
||||
use std::io::Cursor;
|
||||
|
||||
use big_s::S;
|
||||
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::SearchResult;
|
||||
|
||||
fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> {
|
||||
let mut documents = Vec::new();
|
||||
for prefix in prefixes {
|
||||
for i in 0..500 {
|
||||
documents.push(
|
||||
serde_json::json!({
|
||||
"text": format!("{prefix}{i:x}"),
|
||||
})
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.clone(),
|
||||
)
|
||||
}
|
||||
}
|
||||
documents
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_proximity_criterion_prefix_handling() {
|
||||
let mut index = TempIndex::new();
|
||||
index.index_documents_config.autogenerate_docids = true;
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key(S("id"));
|
||||
settings.set_criteria(vec![
|
||||
"words".to_owned(),
|
||||
"typo".to_owned(),
|
||||
"proximity".to_owned(),
|
||||
]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let mut documents = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
for doc in [
|
||||
// 0
|
||||
serde_json::json!({ "text": "zero is exactly the amount of configuration I want" }),
|
||||
// 1
|
||||
serde_json::json!({ "text": "zero bad configuration" }),
|
||||
// 2
|
||||
serde_json::json!({ "text": "zero configuration" }),
|
||||
// 3
|
||||
serde_json::json!({ "text": "zero config" }),
|
||||
// 4
|
||||
serde_json::json!({ "text": "zero conf" }),
|
||||
// 5
|
||||
serde_json::json!({ "text": "zero bad conf" }),
|
||||
] {
|
||||
documents.append_json_object(doc.as_object().unwrap()).unwrap();
|
||||
}
|
||||
for doc in documents_with_enough_different_words_for_prefixes(&["conf"]) {
|
||||
documents.append_json_object(&doc).unwrap();
|
||||
}
|
||||
let documents =
|
||||
DocumentsBatchReader::from_reader(Cursor::new(documents.into_inner().unwrap()))
|
||||
.unwrap();
|
||||
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } =
|
||||
index.search(&rtxn).query("zero c").execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 4, 1, 5, 0]");
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } =
|
||||
index.search(&rtxn).query("zero co").execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 4, 1, 5, 0]");
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } =
|
||||
index.search(&rtxn).query("zero con").execute().unwrap();
|
||||
// Here searh results are degraded because `con` is in the prefix cache but it is too
|
||||
// long to be stored in the prefix proximity databases, and we don't want to iterate over
|
||||
// all of its word derivations
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 4, 5]");
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } =
|
||||
index.search(&rtxn).query("zero conf").execute().unwrap();
|
||||
// Here search results are degraded as well, but we can still rank correctly documents
|
||||
// that contain `conf` exactly, and not as a prefix.
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5, 0, 1, 2, 3]");
|
||||
|
||||
let SearchResult { matching_words: _, candidates: _, documents_ids } =
|
||||
index.search(&rtxn).query("zero config").execute().unwrap();
|
||||
// `config` is not a common prefix, so the normal methods are used
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 1, 0, 4, 5]");
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user