Add search tests

This commit is contained in:
Loïc Lecrenier 2023-04-04 16:18:22 +02:00
parent b439d36807
commit 62b9c6fbee
4 changed files with 887 additions and 0 deletions

View File

@ -0,0 +1,3 @@
pub mod ngram_split_words;
pub mod typo;
pub mod words_tms;

View File

@ -0,0 +1,255 @@
/*!
This module tests the following properties:
1. Two consecutive words from a query can be combined into a "2gram"
2. Three consecutive words from a query can be combined into a "3gram"
3. A word from the query can be split into two consecutive words (split words)
4. A 2gram can be split into two words
5. A 3gram cannot be split into two words
6. 2grams can contain up to 1 typo
7. 3grams cannot have typos
8. 2grams and 3grams can be prefix tolerant
9. Disabling typo tolerance also disable the split words feature
10. Disabling typo tolerance does not disable prefix tolerance
11. Disabling typo tolerance does not disable ngram tolerance
12. Prefix tolerance is disabled for the last word if a space follows it
13. Ngrams cannot be formed by combining a phrase and a word or two phrases
*/
use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy};
fn create_index() -> TempIndex {
let index = TempIndex::new();
index
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Words]);
})
.unwrap();
index
.add_documents(documents!([
{
"id": 0,
"text": "the sun flowers are pretty"
},
{
"id": 1,
"text": "the sun flower is tall"
},
{
"id": 2,
"text": "the sunflowers are pretty"
},
{
"id": 3,
"text": "the sunflower is tall"
}
]))
.unwrap();
index
}
#[test]
fn test_2gram_simple() {
let index = create_index();
index
.update_settings(|s| {
s.set_autorize_typos(false);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// will also match documents with "sun flower"
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
}
#[test]
fn test_3gram_simple() {
let index = create_index();
index
.update_settings(|s| {
s.set_autorize_typos(false);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flower s are");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2]");
}
#[test]
fn test_2gram_typo() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flawer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
}
#[test]
fn test_no_disable_ngrams() {
let index = create_index();
index
.update_settings(|s| {
s.set_autorize_typos(false);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// documents containing `sunflower`
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 3]");
}
#[test]
fn test_2gram_prefix() {
let index = create_index();
index
.update_settings(|s| {
s.set_autorize_typos(false);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sun flow");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// documents containing words beginning with `sunflow`
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
}
#[test]
fn test_3gram_prefix() {
let index = create_index();
index
.update_settings(|s| {
s.set_autorize_typos(false);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("su nf l");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// documents containing a word beginning with sunfl
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]");
}
#[test]
fn test_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunflower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// all the documents with either `sunflower` or `sun flower`
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]");
}
#[test]
fn test_disable_split_words() {
let index = create_index();
index
.update_settings(|s| {
s.set_autorize_typos(false);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunflower ");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// no document containing `sun flower`
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
}
#[test]
fn test_2gram_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf lower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// all the documents with "sunflower", "sun flower", or (sunflower + 1 typo)
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]");
}
#[test]
fn test_3gram_no_split_words() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf lo wer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// no document with `sun flower`
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]");
}
#[test]
fn test_3gram_no_typos() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("sunf la wer");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
}
#[test]
fn test_no_ngram_phrases() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("\"sun\" flower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]");
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("\"sun\" \"flower\"");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1]");
}

View File

@ -0,0 +1,363 @@
/*!
This module tests the following properties:
1. The `words` ranking rule is typo-tolerant
2. Typo-tolerance handles missing letters, extra letters, replaced letters, and swapped letters (at least)
3. Words which are < `min_word_len_one_typo` are not typo tolerant
4. Words which are >= `min_word_len_one_typo` but < `min_word_len_two_typos` can have one typo
5. Words which are >= `min_word_len_two_typos` can have two typos
6. A typo on the first letter of a word counts as two typos
7. Phrases are not typo tolerant
8. 2grams can have 1 typo if they are larger than `min_word_len_two_typos`
9. 3grams are not typo tolerant
10. The `typo` ranking rule assumes the role of the `words` ranking rule implicitly
if `words` doesn't exist before it.
11. The `typo` ranking rule places documents with the same number of typos in the same bucket
12. Prefix tolerance costs nothing according to the typo ranking rule
13. Split words cost 1 typo according to the typo ranking rule
14. Synonyms cost nothing according to the typo ranking rule
*/
use std::collections::HashMap;
use crate::{
index::tests::TempIndex, Criterion,
Search, SearchResult, TermsMatchingStrategy,
};
fn create_index() -> TempIndex {
let index = TempIndex::new();
index
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Words]);
})
.unwrap();
index
.add_documents(documents!([
{
"id": 0,
"text": "the quick brown fox jumps over the lazy dog"
},
{
"id": 1,
"text": "the quick brown foxes jump over the lazy dog"
},
{
"id": 2,
"text": "the quick brown fax sends a letter to the dog"
},
{
"id": 3,
"text": "the quickest brownest fox jumps over the laziest dog"
},
{
"id": 4,
"text": "a fox doesn't quack, that crown goes to the duck."
},
{
"id": 5,
"text": "the quicker browner fox jumped over the lazier dog"
},
{
"id": 6,
"text": "the extravagant fox skyrocketed over the languorous dog" // thanks thesaurus
},
{
"id": 7,
"text": "the quick brown fox jumps over the lazy"
},
{
"id": 8,
"text": "the quick brown fox jumps over the"
},
{
"id": 9,
"text": "the quick brown fox jumps over"
},
{
"id": 10,
"text": "the quick brown fox jumps"
},
{
"id": 11,
"text": "the quick brown fox"
},
{
"id": 12,
"text": "the quick brown"
},
{
"id": 13,
"text": "the quick"
},
{
"id": 14,
"text": "netwolk interconections sunflawar"
},
{
"id": 15,
"text": "network interconnections sunflawer"
},
{
"id": 16,
"text": "network interconnection sunflower"
},
{
"id": 17,
"text": "network interconnection sun flower"
},
{
"id": 18,
"text": "network interconnection sunflowering"
},
{
"id": 19,
"text": "network interconnection sun flowering"
},
{
"id": 20,
"text": "network interconnection sunflowar"
},
{
"id": 21,
"text": "the fast brownish fox jumps over the lackadaisical dog"
},
{
"id": 22,
"text": "the quick brown fox jumps over the lackadaisical dog"
},
]))
.unwrap();
index
}
#[test]
fn test_no_typo() {
let index = create_index();
index
.update_settings(|s| {
s.set_autorize_typos(false);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
}
#[test]
fn test_default_typo() {
let index = create_index();
let txn = index.read_txn().unwrap();
let ot = index.min_word_len_one_typo(&txn).unwrap();
let tt = index.min_word_len_two_typos(&txn).unwrap();
insta::assert_debug_snapshot!(ot, @"5");
insta::assert_debug_snapshot!(tt, @"9");
// 0 typo
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
// 1 typo on one word, replaced letter
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
// 1 typo on one word, missing letter, extra letter
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quicest brownest fox jummps over the laziest dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
// 1 typo on one word, swapped letters
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quikc borwn fox jupms over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
// 1 first letter typo on a word <5 bytes, replaced letter
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the nuick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
// 1 first letter typo on a word <5 bytes, missing letter
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the uick brown fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
// 1 typo on all words >=5 bytes, replaced letters
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quack brawn fox junps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
// 2 typos on words < 9 bytes
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quckest brawnert fox jumps over the aziest dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
// 2 typos on words >= 9 bytes: missing letters, missing first letter, replaced letters
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the extravant fox kyrocketed over the lamguorout dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
// 2 typos on words >= 9 bytes: 2 extra letters in a single word, swapped letters + extra letter, replaced letters
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the extravaganttt fox sktyrocnketed over the lagnuorrous dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
}
#[test]
fn test_phrase_no_typo_allowed() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the \"quick brewn\" fox jumps over the lazy dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
}
#[test]
fn test_ngram_typos() {
let index = create_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the extra lagant fox skyrocketed over the languorous dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the ex tra lagant fox skyrocketed over the languorous dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
}
#[test]
fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Typo]);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids: ids_1, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
})
.unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::Last);
s.query("the quick brown fox jumps over the lazy dog");
let SearchResult { documents_ids: ids_2, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
assert_eq!(ids_1, ids_2);
}
#[test]
fn test_typo_bucketing() {
let index = create_index();
let txn = index.read_txn().unwrap();
// First do the search with just the Words ranking rule
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("network interconnection sunflower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 15, 16, 17, 18, 20]");
// Then with the typo ranking rule
drop(txn);
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Typo]);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("network interconnection sunflower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18, 17, 20, 15, 14]");
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("network interconnection sun flower");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[17, 19, 16, 18, 20, 15]");
}
#[test]
fn test_typo_synonyms() {
let index = create_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Typo]);
let mut synonyms = HashMap::new();
synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);
synonyms.insert("fast brownish".to_owned(), vec!["quick brown".to_owned()]);
s.set_synonyms(synonyms);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the quick brown fox jumps over the lackadaisical dog");
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]");
let mut s = Search::new(&txn, &index);
s.terms_matching_strategy(TermsMatchingStrategy::All);
s.query("the fast brownish fox jumps over the lackadaisical dog");
// TODO: is this correct? interaction of ngrams + synonyms means that the
// multi-word synonyms end up having a typo cost. This is probably not what we want.
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]");
}

View File

@ -0,0 +1,266 @@
/*!
This module tests the following properties:
1. The `last` term matching strategy starts removing terms from the query
starting from the end if no more results match it.
2. Phrases are never deleted by the `last` term matching strategy
3. Duplicate words don't affect the ranking of a document according to the `words` ranking rule
4. The proximity of the first and last word of a phrase to its adjacent terms is taken into
account by the proximity ranking rule.
5. Unclosed double quotes still make a phrase
6. The `all` term matching strategy does not remove any term from the query
7. The search is capable of returning no results if no documents match the query
*/
use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy};
fn create_quick_brown_fox_trivial_index() -> TempIndex {
let index = TempIndex::new();
index
.update_settings(|s| {
s.set_primary_key("id".to_owned());
s.set_searchable_fields(vec!["text".to_owned()]);
s.set_criteria(vec![Criterion::Words]);
})
.unwrap();
index
.add_documents(documents!([
{
"id": 0,
"text": "",
},
{
"id": 1,
"text": "the",
},
{
"id": 2,
"text": "the quick",
},
{
"id": 3,
"text": "the quick brown",
},
{
"id": 4,
"text": "the quick brown fox",
},
{
"id": 5,
"text": "the quick brown fox jumps",
},
{
"id": 6,
"text": "the quick brown fox jumps over",
},
{
"id": 7,
"text": "the quick brown fox jumps over the",
},
{
"id": 8,
"text": "the quick brown fox jumps over the lazy",
},
{
"id": 9,
"text": "the quick brown fox jumps over the lazy dog",
},
{
"id": 10,
"text": "the brown quick fox jumps over the lazy dog",
},
{
"id": 11,
"text": "the quick brown fox talks to the lazy and slow dog",
},
{
"id": 12,
"text": "the quick brown fox talks to the lazy dog",
},
{
"id": 13,
"text": "the mighty and quick brown fox jumps over the lazy dog",
},
{
"id": 14,
"text": "the great quick brown fox jumps over the lazy dog",
},
{
"id": 15,
"text": "this quick brown and very scary fox jumps over the lazy dog",
},
{
"id": 16,
"text": "this quick brown and scary fox jumps over the lazy dog",
},
{
"id": 17,
"text": "the quick brown fox jumps over the really lazy dog",
},
{
"id": 18,
"text": "the brown quick fox jumps over the really lazy dog",
},
{
"id": 19,
"text": "the brown quick fox immediately jumps over the really lazy dog",
},
{
"id": 20,
"text": "the brown quick fox immediately jumps over the really lazy blue dog",
},
{
"id": 21,
"text": "the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.",
},
{
"id": 22,
"text": "the, quick, brown, fox, jumps, over, the, lazy, dog",
}
]))
.unwrap();
index
}
#[test]
fn test_words_tms_last_simple() {
let index = create_quick_brown_fox_trivial_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// 6 and 7 have the same score because "the" appears twice
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 8, 6, 7, 5, 4, 11, 12, 3]");
let mut s = Search::new(&txn, &index);
s.query("extravagant the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
}
#[test]
fn test_words_tms_last_phrase() {
let index = create_quick_brown_fox_trivial_index();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.query("\"the quick brown fox\" jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// "The quick brown fox" is a phrase, not deleted by this term matching strategy
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 6, 7, 5, 4, 11, 12]");
let mut s = Search::new(&txn, &index);
s.query("\"the quick brown fox\" jumps over the \"lazy\" dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// "lazy" is a phrase, not deleted by this term matching strategy
// but words before it can be deleted
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 11, 12]");
let mut s = Search::new(&txn, &index);
s.query("\"the quick brown fox jumps over the lazy dog\"");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// The whole query is a phrase, no terms are removed
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]");
let mut s = Search::new(&txn, &index);
s.query("\"the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// The whole query is still a phrase, even without closing quotes, so no terms are removed
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]");
}
#[test]
fn test_words_proximity_tms_last_simple() {
let index = create_quick_brown_fox_trivial_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// 7 is better than 6 because of the proximity between "the" and its surrounding terms
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
let mut s = Search::new(&txn, &index);
s.query("the brown quick fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// 10 is better than 9 because of the proximity between "quick" and "brown"
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
}
#[test]
fn test_words_proximity_tms_last_phrase() {
let index = create_quick_brown_fox_trivial_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.query("the \"quick brown\" fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// "quick brown" is a phrase. The proximity of its first and last words
// to their adjacent query words should be taken into account
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5, 4, 11, 12, 3]");
let mut s = Search::new(&txn, &index);
s.query("the \"quick brown\" \"fox jumps\" over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::Last);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
// "quick brown" is a phrase. The proximity of its first and last words
// to their adjacent query words should be taken into account.
// The same applies to `fox jumps`.
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5]");
}
#[test]
fn test_words_tms_all() {
let index = create_quick_brown_fox_trivial_index();
index
.update_settings(|s| {
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
})
.unwrap();
let txn = index.read_txn().unwrap();
let mut s = Search::new(&txn, &index);
s.query("the quick brown fox jumps over the lazy dog");
s.terms_matching_strategy(TermsMatchingStrategy::All);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
let mut s = Search::new(&txn, &index);
s.query("extravagant");
s.terms_matching_strategy(TermsMatchingStrategy::All);
let SearchResult { documents_ids, .. } = s.execute().unwrap();
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
}