mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 13:40:31 +01:00
Add more search tests
This commit is contained in:
parent
c69cbec64a
commit
4c8a0179ba
@ -1,17 +1,17 @@
|
||||
/*!
|
||||
This module tests the Proximity ranking rule:
|
||||
|
||||
1. A proximity of >7 always has the same cost.
|
||||
1. A sprximity of >7 always has the same cost.
|
||||
|
||||
2. Phrase terms can be in proximity to other terms via their start and end words,
|
||||
2. Phrase terms can be in sprximity to other terms via their start and end words,
|
||||
but we need to make sure that the phrase exists in the document that meets this
|
||||
proximity condition. This is especially relevant with split words and synonyms.
|
||||
|
||||
3. An ngram has the same proximity cost as its component words being consecutive.
|
||||
3. An ngram has the same sprximity cost as its component words being consecutive.
|
||||
e.g. `sunflower` equivalent to `sun flower`.
|
||||
|
||||
4. The prefix databases can be used to find the proximity between two words, but
|
||||
they store fewer proximities than the regular word proximity DB.
|
||||
4. The prefix databases can be used to find the sprximity between two words, but
|
||||
they store fewer sprximities than the regular word sprximity DB.
|
||||
|
||||
*/
|
||||
|
||||
@ -126,9 +126,9 @@ fn create_edge_cases_index() -> TempIndex {
|
||||
// The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`.
|
||||
// If the search query is "sunflower", the split word "Sun Flower" will match some documents.
|
||||
// If the query is `sunflower wilting`, then we should make sure that
|
||||
// the proximity condition `flower wilting: prox N` also comes with the condition
|
||||
// `sun wilting: prox N+1`. TODO: this is not the exact condition we use for now.
|
||||
// We only check that the phrase `sun flower` exists and `flower wilting: prox N`, which
|
||||
// the sprximity condition `flower wilting: sprx N` also comes with the condition
|
||||
// `sun wilting: sprx N+1`. TODO: this is not the exact condition we use for now.
|
||||
// We only check that the phrase `sun flower` exists and `flower wilting: sprx N`, which
|
||||
// is better than nothing but not the best.
|
||||
{
|
||||
"id": 1,
|
||||
@ -140,9 +140,9 @@ fn create_edge_cases_index() -> TempIndex {
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
// This document matches the query `sunflower wilting`, but the proximity condition
|
||||
// This document matches the query `sunflower wilting`, but the sprximity condition
|
||||
// between `sunflower` and `wilting` cannot be through the split-word `Sun Flower`
|
||||
// which would reduce to only `flower` and `wilting` being in proximity.
|
||||
// which would reduce to only `flower` and `wilting` being in sprximity.
|
||||
"text": "A flower wilting under the sun, unlike a sunflower"
|
||||
},
|
||||
{
|
||||
@ -195,6 +195,69 @@ fn create_edge_cases_index() -> TempIndex {
|
||||
"id": 13,
|
||||
"text": "summer best"
|
||||
},
|
||||
{
|
||||
// This document will insert "win" in the prefix database
|
||||
"id": 14,
|
||||
"text": "
|
||||
winaa winab winac winae winaf winag winah winai winaj winak winal winam winan winao winap winaq winar winasa winat winau winav winaw winax winay winaz
|
||||
winba winbb winbc winbe winbf winbg winbh winbi winbj winbk winbl winbm winbn winbo winbp winbq winbr winbsb winbt winbu winbv winbw winbx winby winbz
|
||||
winca wincb wincc wince wincf wincg winch winci wincj winck wincl wincm wincn winco wincp wincq wincr wincsc winct wincu wincv wincw wincx wincy wincz
|
||||
winda windb windc winde windf windg windh windi windj windk windl windm windn windo windp windq windr windsd windt windu windv windw windx windy windz
|
||||
winea wineb winec winee winef wineg wineh winei winej winek winel winem winen wineo winep wineq winer winese winet wineu winev winew winex winey winez
|
||||
winfa winfb winfc winfe winff winfg winfh winfi winfj winfk winfl winfm winfn winfo winfp winfq winfr winfsf winft winfu winfv winfw winfx winfy winfz
|
||||
winga wingb wingc winge wingf wingg wingh wingi wingj wingk wingl wingm wingn wingo wingp wingq wingr wingsg wingt wingu wingv wingw wingx wingy wingz
|
||||
winka winkb winkc winke winkf winkg winkh winki winkj winkk winkl winkm winkn winko winkp winkq winkr winksk winkt winku winkv winkw winkx winky winkz
|
||||
winla winlb winlc winle winlf winlg winlh winli winlj winlk winll winlm winln winlo winlp winlq winlr winlsl winlt winlu winlv winlw winlx winly winlz
|
||||
winma winmb winmc winme winmf winmg winmh winmi winmj winmk winml winmm winmn winmo winmp winmq winmr winmsm winmt winmu winmv winmw winmx winmy winmz
|
||||
winna winnb winnc winne winnf winng winnh winni winnj winnk winnl winnm winnn winno winnp winnq winnr winnsn winnt winnu winnv winnw winnx winny winnz
|
||||
winoa winob winoc winoe winof winog winoh winoi winoj winok winol winom winon winoo winop winoq winor winoso winot winou winov winow winox winoy winoz
|
||||
winpa winpb winpc winpe winpf winpg winph winpi winpj winpk winpl winpm winpn winpo winpp winpq winpr winpsp winpt winpu winpv winpw winpx winpy winpz
|
||||
winqa winqb winqc winqe winqf winqg winqh winqi winqj winqk winql winqm winqn winqo winqp winqq winqr winqsq winqt winqu winqv winqw winqx winqy winqz
|
||||
winra winrb winrc winre winrf winrg winrh winri winrj winrk winrl winrm winrn winro winrp winrq winrr winrsr winrt winru winrv winrw winrx winry winrz
|
||||
winsa winsb winsc winse winsf winsg winsh winsi winsj winsk winsl winsm winsn winso winsp winsq winsr winsss winst winsu winsv winsw winsx winsy winsz
|
||||
winta wintb wintc winte wintf wintg winth winti wintj wintk wintl wintm wintn winto wintp wintq wintr wintst wintt wintu wintv wintw wintx winty wintz
|
||||
"
|
||||
},
|
||||
{
|
||||
// Prox MAX between `best` and `win` prefix
|
||||
"id": 15,
|
||||
"text": "this is the best meal I have ever had in such a beautiful winter day"
|
||||
},
|
||||
{
|
||||
// Prox 5 between `best` and `win` prefix
|
||||
"id": 16,
|
||||
"text": "this is the best cooked meal of the winter"
|
||||
},
|
||||
{
|
||||
// Prox 4 between `best` and `win` prefix
|
||||
"id": 17,
|
||||
"text": "this is the best meal of the winter"
|
||||
},
|
||||
{
|
||||
// Prox 3 between `best` and `win` prefix
|
||||
"id": 18,
|
||||
"text": "this is the best meal of winter"
|
||||
},
|
||||
{
|
||||
// Prox 1 between `best` and `win` prefix
|
||||
"id": 19,
|
||||
"text": "this is the best winter meal"
|
||||
},
|
||||
{
|
||||
// Reverse Prox 3 between `best` and `win` prefix
|
||||
"id": 20,
|
||||
"text": "winter x y best"
|
||||
},
|
||||
{
|
||||
// Reverse Prox 2 between `best` and `win` prefix
|
||||
"id": 21,
|
||||
"text": "winter x best"
|
||||
},
|
||||
{
|
||||
// Reverse Prox 1 between `best` and `win` prefix
|
||||
"id": 22,
|
||||
"text": "winter best"
|
||||
},
|
||||
])).unwrap();
|
||||
index
|
||||
}
|
||||
@ -298,7 +361,7 @@ fn test_proximity_prefix_db() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("best s");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11, 15]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
// This test illustrates the loss of precision from using the prefix DB
|
||||
@ -312,6 +375,97 @@ fn test_proximity_prefix_db() {
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"summer x y best\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// Difference when using the `su` prefix, which is not in the prefix DB
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("best su");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 11, 7, 6, 15]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"this is the best summer meal\"",
|
||||
"\"summer best\"",
|
||||
"\"this is the best meal of summer\"",
|
||||
"\"summer x best\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"summer x y best\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// Note that there is a case where a prefix is in the prefix DB but not in the
|
||||
// **proximity** prefix DB. In that case, its sprximity score will always be
|
||||
// the maximum. This happens for prefixes that are larger than 2 bytes.
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("best win");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[15, 16, 17, 18, 19, 20, 21, 22]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"this is the best winter meal\"",
|
||||
"\"winter x y best\"",
|
||||
"\"winter x best\"",
|
||||
"\"winter best\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// Now using `wint`, which is not in the prefix DB:
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("best wint");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 20, 16, 15]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"this is the best winter meal\"",
|
||||
"\"winter best\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"winter x best\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"winter x y best\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// and using `wi` which is in the prefix DB and proximity prefix DB
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("best wi");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[19, 22, 18, 21, 17, 15, 16, 20]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"this is the best winter meal\"",
|
||||
"\"winter best\"",
|
||||
"\"this is the best meal of winter\"",
|
||||
"\"winter x best\"",
|
||||
"\"this is the best meal of the winter\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful winter day\"",
|
||||
"\"this is the best cooked meal of the winter\"",
|
||||
"\"winter x y best\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user