Merge pull request #3703 from meilisearch/search-refactor-test-typo-tolerance

Search refactor test typo tolerance + some bugfixes
This commit is contained in:
Loïc Lecrenier 2023-04-27 11:01:35 +02:00 committed by GitHub
commit dd007dceca
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
2 changed files with 167 additions and 79 deletions

View File

@ -330,6 +330,8 @@ pub fn execute_search(
ctx.index.documents_ids(ctx.txn)? ctx.index.documents_ids(ctx.txn)?
}; };
check_sort_criteria(ctx, sort_criteria.as_ref())?;
let mut located_query_terms = None; let mut located_query_terms = None;
let bucket_sort_output = if let Some(query) = query { let bucket_sort_output = if let Some(query) = query {
// We make sure that the analyzer is aware of the stop words // We make sure that the analyzer is aware of the stop words
@ -352,8 +354,6 @@ pub fn execute_search(
let graph = QueryGraph::from_query(ctx, &query_terms)?; let graph = QueryGraph::from_query(ctx, &query_terms)?;
located_query_terms = Some(query_terms); located_query_terms = Some(query_terms);
check_sort_criteria(ctx, sort_criteria.as_ref())?;
let ranking_rules = let ranking_rules =
get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?; get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?;

View File

@ -20,10 +20,9 @@ if `words` doesn't exist before it.
use std::collections::HashMap; use std::collections::HashMap;
use crate::{ use crate::index::tests::TempIndex;
index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search, use crate::search::new::tests::collect_field_values;
SearchResult, TermsMatchingStrategy, use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
};
fn create_index() -> TempIndex { fn create_index() -> TempIndex {
let index = TempIndex::new(); let index = TempIndex::new();
@ -134,6 +133,14 @@ fn create_index() -> TempIndex {
"id": 23, "id": 23,
"text": "the quivk brown fox jumps over the lazy dog" "text": "the quivk brown fox jumps over the lazy dog"
}, },
{
"id": 24,
"tolerant_text": "the quick brown fox jumps over the lazy dog",
},
{
"id": 25,
"tolerant_text": "the quivk brown fox jumps over the lazy dog",
},
])) ]))
.unwrap(); .unwrap();
index index
@ -212,79 +219,6 @@ fn test_default_typo() {
"\"the quickest brownest fox jumps over the laziest dog\"", "\"the quickest brownest fox jumps over the laziest dog\"",
] ]
"###); "###);
// 1 typo on one word, swapped letters
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quikc borwn fox jupms over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
]
"###);
// 1 first letter typo on a word <5 bytes, replaced letter
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the nuick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
// 1 first letter typo on a word <5 bytes, missing letter
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the uick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
// 1 typo on all words >=5 bytes, replaced letters
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brawn fox junps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
]
"###);
// 2 typos on words < 9 bytes
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quckest brawnert fox jumps over the aziest dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
// 2 typos on words >= 9 bytes: missing letters, missing first letter, replaced letters
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the extravant fox kyrocketed over the lamguorout dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the extravagant fox skyrocketed over the languorous dog\"",
]
"###);
// 2 typos on words >= 9 bytes: 2 extra letters in a single word, swapped letters + extra letter, replaced letters
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the extravaganttt fox sktyrocnketed over the lagnuorrous dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the extravagant fox skyrocketed over the languorous dog\"",
]
"###);
} }
#[test] #[test]
@ -301,6 +235,160 @@ fn test_phrase_no_typo_allowed() {
insta::assert_debug_snapshot!(texts, @"[]"); insta::assert_debug_snapshot!(texts, @"[]");
} }
#[test]
fn test_typo_exact_word() {
let index = create_index();
index
.update_settings(|s| {
s.set_exact_words(
["quick", "quack", "sunflower"].iter().map(ToString::to_string).collect(),
)
})
.unwrap();
let txn = index.read_txn().unwrap();
let ot = index.min_word_len_one_typo(&txn).unwrap();
let tt = index.min_word_len_two_typos(&txn).unwrap();
insta::assert_debug_snapshot!(ot, @"5");
insta::assert_debug_snapshot!(tt, @"9");
// don't match quivk
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
]
"###);
// Don't match quick
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
// words not in exact_words (quicest, jummps) have normal typo handling
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quickest brownest fox jumps over the laziest dog\"",
]
"###);
// exact words do not disable prefix (sunflowering OK, but no sunflowar or sun flower)
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("network interconnection sunflower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"network interconnection sunflower\"",
"\"network interconnection sunflowering\"",
]
"###);
}
#[test]
fn test_typo_exact_attribute() {
let index = create_index();
index
.update_settings(|s| {
s.set_exact_attributes(["text"].iter().map(ToString::to_string).collect());
s.set_searchable_fields(
["text", "tolerant_text"].iter().map(ToString::to_string).collect(),
);
s.set_exact_words(["quivk"].iter().map(ToString::to_string).collect())
})
.unwrap();
let txn = index.read_txn().unwrap();
let ot = index.min_word_len_one_typo(&txn).unwrap();
let tt = index.min_word_len_two_typos(&txn).unwrap();
insta::assert_debug_snapshot!(ot, @"5");
insta::assert_debug_snapshot!(tt, @"9");
// Exact match returns both exact attributes and tolerant ones.
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 24, 25]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
"__does_not_exist__",
"__does_not_exist__",
]
"###);
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"__does_not_exist__",
"\"the quick brown fox jumps over the lazy dog\"",
"\"the quivk brown fox jumps over the lazy dog\"",
]
"###);
// 1 typo only returns the tolerant attribute
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quidk brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[24, 25]");
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quick brown fox jumps over the lazy dog\"",
"\"the quivk brown fox jumps over the lazy dog\"",
]
"###);
// combine with exact words
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quivk brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 25]");
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"\"the quivk brown fox jumps over the lazy dog\"",
"__does_not_exist__",
]
"###);
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
insta::assert_debug_snapshot!(texts, @r###"
[
"__does_not_exist__",
"\"the quivk brown fox jumps over the lazy dog\"",
]
"###);
// No result in tolerant attribute
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
}
#[test] #[test]
fn test_ngram_typos() { fn test_ngram_typos() {
let index = create_index(); let index = create_index();