mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
Merge pull request #3703 from meilisearch/search-refactor-test-typo-tolerance
Search refactor test typo tolerance + some bugfixes
This commit is contained in:
commit
dd007dceca
@ -330,6 +330,8 @@ pub fn execute_search(
|
|||||||
ctx.index.documents_ids(ctx.txn)?
|
ctx.index.documents_ids(ctx.txn)?
|
||||||
};
|
};
|
||||||
|
|
||||||
|
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
||||||
|
|
||||||
let mut located_query_terms = None;
|
let mut located_query_terms = None;
|
||||||
let bucket_sort_output = if let Some(query) = query {
|
let bucket_sort_output = if let Some(query) = query {
|
||||||
// We make sure that the analyzer is aware of the stop words
|
// We make sure that the analyzer is aware of the stop words
|
||||||
@ -352,8 +354,6 @@ pub fn execute_search(
|
|||||||
let graph = QueryGraph::from_query(ctx, &query_terms)?;
|
let graph = QueryGraph::from_query(ctx, &query_terms)?;
|
||||||
located_query_terms = Some(query_terms);
|
located_query_terms = Some(query_terms);
|
||||||
|
|
||||||
check_sort_criteria(ctx, sort_criteria.as_ref())?;
|
|
||||||
|
|
||||||
let ranking_rules =
|
let ranking_rules =
|
||||||
get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?;
|
get_ranking_rules_for_query_graph_search(ctx, sort_criteria, terms_matching_strategy)?;
|
||||||
|
|
||||||
|
@ -20,10 +20,9 @@ if `words` doesn't exist before it.
|
|||||||
|
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
|
||||||
use crate::{
|
use crate::index::tests::TempIndex;
|
||||||
index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
|
use crate::search::new::tests::collect_field_values;
|
||||||
SearchResult, TermsMatchingStrategy,
|
use crate::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||||
};
|
|
||||||
|
|
||||||
fn create_index() -> TempIndex {
|
fn create_index() -> TempIndex {
|
||||||
let index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
@ -134,6 +133,14 @@ fn create_index() -> TempIndex {
|
|||||||
"id": 23,
|
"id": 23,
|
||||||
"text": "the quivk brown fox jumps over the lazy dog"
|
"text": "the quivk brown fox jumps over the lazy dog"
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"id": 24,
|
||||||
|
"tolerant_text": "the quick brown fox jumps over the lazy dog",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 25,
|
||||||
|
"tolerant_text": "the quivk brown fox jumps over the lazy dog",
|
||||||
|
},
|
||||||
]))
|
]))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
index
|
index
|
||||||
@ -212,79 +219,6 @@ fn test_default_typo() {
|
|||||||
"\"the quickest brownest fox jumps over the laziest dog\"",
|
"\"the quickest brownest fox jumps over the laziest dog\"",
|
||||||
]
|
]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
// 1 typo on one word, swapped letters
|
|
||||||
let mut s = Search::new(&txn, &index);
|
|
||||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
|
||||||
s.query("the quikc borwn fox jupms over the lazy dog");
|
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
|
||||||
[
|
|
||||||
"\"the quick brown fox jumps over the lazy dog\"",
|
|
||||||
]
|
|
||||||
"###);
|
|
||||||
|
|
||||||
// 1 first letter typo on a word <5 bytes, replaced letter
|
|
||||||
let mut s = Search::new(&txn, &index);
|
|
||||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
|
||||||
s.query("the nuick brown fox jumps over the lazy dog");
|
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
|
||||||
|
|
||||||
// 1 first letter typo on a word <5 bytes, missing letter
|
|
||||||
let mut s = Search::new(&txn, &index);
|
|
||||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
|
||||||
s.query("the uick brown fox jumps over the lazy dog");
|
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
|
||||||
|
|
||||||
// 1 typo on all words >=5 bytes, replaced letters
|
|
||||||
let mut s = Search::new(&txn, &index);
|
|
||||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
|
||||||
s.query("the quack brawn fox junps over the lazy dog");
|
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
|
||||||
[
|
|
||||||
"\"the quick brown fox jumps over the lazy dog\"",
|
|
||||||
]
|
|
||||||
"###);
|
|
||||||
|
|
||||||
// 2 typos on words < 9 bytes
|
|
||||||
let mut s = Search::new(&txn, &index);
|
|
||||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
|
||||||
s.query("the quckest brawnert fox jumps over the aziest dog");
|
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
|
||||||
|
|
||||||
// 2 typos on words >= 9 bytes: missing letters, missing first letter, replaced letters
|
|
||||||
let mut s = Search::new(&txn, &index);
|
|
||||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
|
||||||
s.query("the extravant fox kyrocketed over the lamguorout dog");
|
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
|
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
|
||||||
[
|
|
||||||
"\"the extravagant fox skyrocketed over the languorous dog\"",
|
|
||||||
]
|
|
||||||
"###);
|
|
||||||
|
|
||||||
// 2 typos on words >= 9 bytes: 2 extra letters in a single word, swapped letters + extra letter, replaced letters
|
|
||||||
let mut s = Search::new(&txn, &index);
|
|
||||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
|
||||||
s.query("the extravaganttt fox sktyrocnketed over the lagnuorrous dog");
|
|
||||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
|
||||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
|
|
||||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
|
||||||
insta::assert_debug_snapshot!(texts, @r###"
|
|
||||||
[
|
|
||||||
"\"the extravagant fox skyrocketed over the languorous dog\"",
|
|
||||||
]
|
|
||||||
"###);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -301,6 +235,160 @@ fn test_phrase_no_typo_allowed() {
|
|||||||
insta::assert_debug_snapshot!(texts, @"[]");
|
insta::assert_debug_snapshot!(texts, @"[]");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_typo_exact_word() {
|
||||||
|
let index = create_index();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_exact_words(
|
||||||
|
["quick", "quack", "sunflower"].iter().map(ToString::to_string).collect(),
|
||||||
|
)
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let ot = index.min_word_len_one_typo(&txn).unwrap();
|
||||||
|
let tt = index.min_word_len_two_typos(&txn).unwrap();
|
||||||
|
insta::assert_debug_snapshot!(ot, @"5");
|
||||||
|
insta::assert_debug_snapshot!(tt, @"9");
|
||||||
|
|
||||||
|
// don't match quivk
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Don't match quick
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
s.query("the quack brown fox jumps over the lazy dog");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
||||||
|
|
||||||
|
// words not in exact_words (quicest, jummps) have normal typo handling
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
s.query("the quicest brownest fox jummps over the laziest dog");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"the quickest brownest fox jumps over the laziest dog\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// exact words do not disable prefix (sunflowering OK, but no sunflowar or sun flower)
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
s.query("network interconnection sunflower");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"network interconnection sunflower\"",
|
||||||
|
"\"network interconnection sunflowering\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_typo_exact_attribute() {
|
||||||
|
let index = create_index();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_exact_attributes(["text"].iter().map(ToString::to_string).collect());
|
||||||
|
s.set_searchable_fields(
|
||||||
|
["text", "tolerant_text"].iter().map(ToString::to_string).collect(),
|
||||||
|
);
|
||||||
|
s.set_exact_words(["quivk"].iter().map(ToString::to_string).collect())
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let ot = index.min_word_len_one_typo(&txn).unwrap();
|
||||||
|
let tt = index.min_word_len_two_typos(&txn).unwrap();
|
||||||
|
insta::assert_debug_snapshot!(ot, @"5");
|
||||||
|
insta::assert_debug_snapshot!(tt, @"9");
|
||||||
|
|
||||||
|
// Exact match returns both exact attributes and tolerant ones.
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 24, 25]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"__does_not_exist__",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"__does_not_exist__",
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
"\"the quivk brown fox jumps over the lazy dog\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// 1 typo only returns the tolerant attribute
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
s.query("the quidk brown fox jumps over the lazy dog");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[24, 25]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
"\"the quivk brown fox jumps over the lazy dog\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// combine with exact words
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
s.query("the quivk brown fox jumps over the lazy dog");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 25]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"the quivk brown fox jumps over the lazy dog\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
let texts = collect_field_values(&index, &txn, "tolerant_text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"__does_not_exist__",
|
||||||
|
"\"the quivk brown fox jumps over the lazy dog\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// No result in tolerant attribute
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
s.query("the quicest brownest fox jummps over the laziest dog");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
||||||
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
fn test_ngram_typos() {
|
fn test_ngram_typos() {
|
||||||
let index = create_index();
|
let index = create_index();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user