mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-04 20:18:55 +01:00
Merge pull request #3703 from meilisearch/search-refactor-test-typo-tolerance
Search refactor test typo tolerance + some bugfixes
This commit is contained in:
commit
dd007dceca
@ -330,6 +330,8 @@ pub fn execute_search(
|
||||
ctx.index.documents_ids(ctx.txn)?
|
||||
};
|
||||
|
||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||
|
||||
let mut located_query_terms = None;
|
||||
let bucket_sort_output = if let Some(query) = query {
|
||||
// We make sure that the analyzer is aware of the stop words
|
||||
@ -352,8 +354,6 @@ pub fn execute_search(
|
||||
let graph = QueryGraph::from_query(ctx, &query_terms)?;
|
||||
located_query_terms = Some(query_terms);
|
||||
|
||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||
|
||||
let ranking_rules =
|
||||
get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?;
|
||||
|
||||
|
@ -20,10 +20,9 @@ if `words` doesn't exist before it.
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::{
|
||||
index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
|
||||
SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::search::new::tests::collect_field_values;
|
||||
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@ -134,6 +133,14 @@ fn create_index() -> TempIndex {
|
||||
"id": 23,
|
||||
"text": "the quivk brown fox jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"id": 24,
|
||||
"tolerant_text": "the quick brown fox jumps over the lazy dog",
|
||||
},
|
||||
{
|
||||
"id": 25,
|
||||
"tolerant_text": "the quivk brown fox jumps over the lazy dog",
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
@ -212,79 +219,6 @@ fn test_default_typo() {
|
||||
"\"the quickest brownest fox jumps over the laziest dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// 1 typo on one word, swapped letters
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quikc borwn fox jupms over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// 1 first letter typo on a word <5 bytes, replaced letter
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the nuick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
||||
|
||||
// 1 first letter typo on a word <5 bytes, missing letter
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the uick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
||||
|
||||
// 1 typo on all words >=5 bytes, replaced letters
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quack brawn fox junps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// 2 typos on words < 9 bytes
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quckest brawnert fox jumps over the aziest dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
||||
|
||||
// 2 typos on words >= 9 bytes: missing letters, missing first letter, replaced letters
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the extravant fox kyrocketed over the lamguorout dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the extravagant fox skyrocketed over the languorous dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// 2 typos on words >= 9 bytes: 2 extra letters in a single word, swapped letters + extra letter, replaced letters
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the extravaganttt fox sktyrocnketed over the lagnuorrous dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the extravagant fox skyrocketed over the languorous dog\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -301,6 +235,160 @@ fn test_phrase_no_typo_allowed() {
|
||||
insta::assert_debug_snapshot!(texts, @"[]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typo_exact_word() {
|
||||
let index = create_index();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_exact_words(
|
||||
["quick", "quack", "sunflower"].iter().map(ToString::to_string).collect(),
|
||||
)
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let ot = index.min_word_len_one_typo(&txn).unwrap();
|
||||
let tt = index.min_word_len_two_typos(&txn).unwrap();
|
||||
insta::assert_debug_snapshot!(ot, @"5");
|
||||
insta::assert_debug_snapshot!(tt, @"9");
|
||||
|
||||
// don't match quivk
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// Don't match quick
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quack brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
||||
|
||||
// words not in exact_words (quicest, jummps) have normal typo handling
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quicest brownest fox jummps over the laziest dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quickest brownest fox jumps over the laziest dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// exact words do not disable prefix (sunflowering OK, but no sunflowar or sun flower)
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("network interconnection sunflower");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"network interconnection sunflower\"",
|
||||
"\"network interconnection sunflowering\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typo_exact_attribute() {
|
||||
let index = create_index();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_exact_attributes(["text"].iter().map(ToString::to_string).collect());
|
||||
s.set_searchable_fields(
|
||||
["text", "tolerant_text"].iter().map(ToString::to_string).collect(),
|
||||
);
|
||||
s.set_exact_words(["quivk"].iter().map(ToString::to_string).collect())
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let ot = index.min_word_len_one_typo(&txn).unwrap();
|
||||
let tt = index.min_word_len_two_typos(&txn).unwrap();
|
||||
insta::assert_debug_snapshot!(ot, @"5");
|
||||
insta::assert_debug_snapshot!(tt, @"9");
|
||||
|
||||
// Exact match returns both exact attributes and tolerant ones.
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 24, 25]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"__does_not_exist__",
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quivk brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// 1 typo only returns the tolerant attribute
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quidk brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[24, 25]");
|
||||
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quivk brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// combine with exact words
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quivk brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 25]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quivk brown fox jumps over the lazy dog\"",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"__does_not_exist__",
|
||||
"\"the quivk brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// No result in tolerant attribute
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quicest brownest fox jummps over the laziest dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_ngram_typos() {
|
||||
let index = create_index();
|
||||
|
Loading…
Reference in New Issue
Block a user