mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Add more search tests
This commit is contained in:
parent
ce328c329d
commit
c69cbec64a
@ -18,5 +18,5 @@ fn test_kanji_language_detection() {
|
||||
search.query("東京");
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
assert_eq!(documents_ids, vec![1]);
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1]");
|
||||
}
|
||||
|
@ -16,7 +16,10 @@ This module tests the following properties:
|
||||
13. Ngrams cannot be formed by combining a phrase and a word or two phrases
|
||||
*/
|
||||
|
||||
use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{
|
||||
index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
|
||||
SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
@ -46,6 +49,14 @@ fn create_index() -> TempIndex {
|
||||
{
|
||||
"id": 3,
|
||||
"text": "the sunflower is tall"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"text": "the sunflawer is tall"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"text": "sunflowering is not a verb"
|
||||
}
|
||||
]))
|
||||
.unwrap();
|
||||
@ -67,8 +78,18 @@ fn test_2gram_simple() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("sun flower");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
// will also match documents with "sun flower"
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
|
||||
// will also match documents with "sunflower" + prefix tolerance
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 5]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sun flowers are pretty\"",
|
||||
"\"the sun flower is tall\"",
|
||||
"\"the sunflowers are pretty\"",
|
||||
"\"the sunflower is tall\"",
|
||||
"\"sunflowering is not a verb\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
#[test]
|
||||
fn test_3gram_simple() {
|
||||
@ -87,6 +108,13 @@ fn test_3gram_simple() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sun flowers are pretty\"",
|
||||
"\"the sunflowers are pretty\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -99,7 +127,18 @@ fn test_2gram_typo() {
|
||||
s.query("sun flawer");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 4, 5]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sun flowers are pretty\"",
|
||||
"\"the sun flower is tall\"",
|
||||
"\"the sunflowers are pretty\"",
|
||||
"\"the sunflower is tall\"",
|
||||
"\"the sunflawer is tall\"",
|
||||
"\"sunflowering is not a verb\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -119,6 +158,13 @@ fn test_no_disable_ngrams() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
// documents containing `sunflower`
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 3]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sun flower is tall\"",
|
||||
"\"the sunflower is tall\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -137,7 +183,17 @@ fn test_2gram_prefix() {
|
||||
s.query("sun flow");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
// documents containing words beginning with `sunflow`
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3, 5]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sun flowers are pretty\"",
|
||||
"\"the sun flower is tall\"",
|
||||
"\"the sunflowers are pretty\"",
|
||||
"\"the sunflower is tall\"",
|
||||
"\"sunflowering is not a verb\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -157,7 +213,16 @@ fn test_3gram_prefix() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
// documents containing a word beginning with sunfl
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 4, 5]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sunflowers are pretty\"",
|
||||
"\"the sunflower is tall\"",
|
||||
"\"the sunflawer is tall\"",
|
||||
"\"sunflowering is not a verb\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -170,8 +235,17 @@ fn test_split_words() {
|
||||
s.query("sunflower ");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
// all the documents with either `sunflower` or `sun flower`
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]");
|
||||
// all the documents with either `sunflower` or `sun flower` + eventual typo
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3, 4]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sun flower is tall\"",
|
||||
"\"the sunflowers are pretty\"",
|
||||
"\"the sunflower is tall\"",
|
||||
"\"the sunflawer is tall\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -191,6 +265,12 @@ fn test_disable_split_words() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
// no document containing `sun flower`
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sunflower is tall\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -203,8 +283,18 @@ fn test_2gram_split_words() {
|
||||
s.query("sunf lower");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
// all the documents with "sunflower", "sun flower", or (sunflower + 1 typo)
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]");
|
||||
// all the documents with "sunflower", "sun flower", (sunflower + 1 typo), or (sunflower as prefix)
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3, 4, 5]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sun flower is tall\"",
|
||||
"\"the sunflowers are pretty\"",
|
||||
"\"the sunflower is tall\"",
|
||||
"\"the sunflawer is tall\"",
|
||||
"\"sunflowering is not a verb\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -218,7 +308,15 @@ fn test_3gram_no_split_words() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
// no document with `sun flower`
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3, 5]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sunflowers are pretty\"",
|
||||
"\"the sunflower is tall\"",
|
||||
"\"sunflowering is not a verb\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -231,7 +329,13 @@ fn test_3gram_no_typos() {
|
||||
s.query("sunf la wer");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sunflawer is tall\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -245,6 +349,13 @@ fn test_no_ngram_phrases() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sun flowers are pretty\"",
|
||||
"\"the sun flower is tall\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
@ -252,4 +363,10 @@ fn test_no_ngram_phrases() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the sun flower is tall\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
@ -0,0 +1,317 @@
|
||||
/*!
|
||||
This module tests the Proximity ranking rule:
|
||||
|
||||
1. A proximity of >7 always has the same cost.
|
||||
|
||||
2. Phrase terms can be in proximity to other terms via their start and end words,
|
||||
but we need to make sure that the phrase exists in the document that meets this
|
||||
proximity condition. This is especially relevant with split words and synonyms.
|
||||
|
||||
3. An ngram has the same proximity cost as its component words being consecutive.
|
||||
e.g. `sunflower` equivalent to `sun flower`.
|
||||
|
||||
4. The prefix databases can be used to find the proximity between two words, but
|
||||
they store fewer proximities than the regular word proximity DB.
|
||||
|
||||
*/
|
||||
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::{
|
||||
index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
|
||||
SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_simple_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{
|
||||
"id": 0,
|
||||
"text": "the very quick dark brown and smart fox did jump over the terribly lazy and small dog"
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"text": "the. quick brown fox jumps over the lazy. dog"
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"text": "the quick brown fox jumps over the lazy. dog"
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"text": "dog the quick brown fox jumps over the lazy"
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"text": "the quickbrown fox jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"text": "brown quick fox jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"text": "the really quick brown fox jumps over the very lazy dog"
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"text": "the really quick brown fox jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"text": "the quick brown fox jumps over the lazy"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"text": "the quack brown fox jumps over the lazy"
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"text": "the quack brown fox jumps over the lazy dog"
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"text": "the quick brown fox jumps over the lazy dog"
|
||||
}
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
fn create_edge_cases_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index.add_documents(documents!([
|
||||
{
|
||||
// This document will insert "s" in the prefix database
|
||||
"id": 0,
|
||||
"text": "
|
||||
saa sab sac sae saf sag sah sai saj sak sal sam san sao sap saq sar sasa sat sau sav saw sax say saz
|
||||
sba sbb sbc sbe sbf sbg sbh sbi sbj sbk sbl sbm sbn sbo sbp sbq sbr sbsb sbt sbu sbv sbw sbx sby sbz
|
||||
sca scb scc sce scf scg sch sci scj sck scl scm scn sco scp scq scr scsc sct scu scv scw scx scy scz
|
||||
sda sdb sdc sde sdf sdg sdh sdi sdj sdk sdl sdm sdn sdo sdp sdq sdr sdsd sdt sdu sdv sdw sdx sdy sdz
|
||||
sea seb sec see sef seg seh sei sej sek sel sem sen seo sep seq ser sese set seu sev sew sex sey sez
|
||||
sfa sfb sfc sfe sff sfg sfh sfi sfj sfk sfl sfm sfn sfo sfp sfq sfr sfsf sft sfu sfv sfw sfx sfy sfz
|
||||
sga sgb sgc sge sgf sgg sgh sgi sgj sgk sgl sgm sgn sgo sgp sgq sgr sgsg sgt sgu sgv sgw sgx sgy sgz
|
||||
ska skb skc ske skf skg skh ski skj skk skl skm skn sko skp skq skr sksk skt sku skv skw skx sky skz
|
||||
sla slb slc sle slf slg slh sli slj slk sll slm sln slo slp slq slr slsl slt slu slv slw slx sly slz
|
||||
sma smb smc sme smf smg smh smi smj smk sml smm smn smo smp smq smr smsm smt smu smv smw smx smy smz
|
||||
sna snb snc sne snf sng snh sni snj snk snl snm snn sno snp snq snr snsn snt snu snv snw snx sny snz
|
||||
soa sob soc soe sof sog soh soi soj sok sol som son soo sop soq sor soso sot sou sov sow sox soy soz
|
||||
spa spb spc spe spf spg sph spi spj spk spl spm spn spo spp spq spr spsp spt spu spv spw spx spy spz
|
||||
sqa sqb sqc sqe sqf sqg sqh sqi sqj sqk sql sqm sqn sqo sqp sqq sqr sqsq sqt squ sqv sqw sqx sqy sqz
|
||||
sra srb src sre srf srg srh sri srj srk srl srm srn sro srp srq srr srsr srt sru srv srw srx sry srz
|
||||
ssa ssb ssc sse ssf ssg ssh ssi ssj ssk ssl ssm ssn sso ssp ssq ssr ssss sst ssu ssv ssw ssx ssy ssz
|
||||
sta stb stc ste stf stg sth sti stj stk stl stm stn sto stp stq str stst stt stu stv stw stx sty stz
|
||||
"
|
||||
},
|
||||
// The next 5 documents lay out a trap with the split word, phrase search, or synonym `sun flower`.
|
||||
// If the search query is "sunflower", the split word "Sun Flower" will match some documents.
|
||||
// If the query is `sunflower wilting`, then we should make sure that
|
||||
// the proximity condition `flower wilting: prox N` also comes with the condition
|
||||
// `sun wilting: prox N+1`. TODO: this is not the exact condition we use for now.
|
||||
// We only check that the phrase `sun flower` exists and `flower wilting: prox N`, which
|
||||
// is better than nothing but not the best.
|
||||
{
|
||||
"id": 1,
|
||||
"text": "Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat."
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"text": "Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat."
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
// This document matches the query `sunflower wilting`, but the proximity condition
|
||||
// between `sunflower` and `wilting` cannot be through the split-word `Sun Flower`
|
||||
// which would reduce to only `flower` and `wilting` being in proximity.
|
||||
"text": "A flower wilting under the sun, unlike a sunflower"
|
||||
},
|
||||
{
|
||||
// This should be the best document for `sunflower wilting`
|
||||
"id": 4,
|
||||
"text": "sun flower wilting under the heat"
|
||||
},
|
||||
{
|
||||
// This is also the best document for `sunflower wilting`
|
||||
"id": 5,
|
||||
"text": "sunflower wilting under the heat"
|
||||
},
|
||||
{
|
||||
// Prox MAX between `best` and `s` prefix
|
||||
"id": 6,
|
||||
"text": "this is the best meal I have ever had in such a beautiful summer day"
|
||||
},
|
||||
{
|
||||
// Prox 5 between `best` and `s` prefix
|
||||
"id": 7,
|
||||
"text": "this is the best cooked meal of the summer"
|
||||
},
|
||||
{
|
||||
// Prox 4 between `best` and `s` prefix
|
||||
"id": 8,
|
||||
"text": "this is the best meal of the summer"
|
||||
},
|
||||
{
|
||||
// Prox 3 between `best` and `s` prefix
|
||||
"id": 9,
|
||||
"text": "this is the best meal of summer"
|
||||
},
|
||||
{
|
||||
// Prox 1 between `best` and `s` prefix
|
||||
"id": 10,
|
||||
"text": "this is the best summer meal"
|
||||
},
|
||||
{
|
||||
// Reverse Prox 3 between `best` and `s` prefix
|
||||
"id": 11,
|
||||
"text": "summer x y best"
|
||||
},
|
||||
{
|
||||
// Reverse Prox 2 between `best` and `s` prefix
|
||||
"id": 12,
|
||||
"text": "summer x best"
|
||||
},
|
||||
{
|
||||
// Reverse Prox 1 between `best` and `s` prefix
|
||||
"id": 13,
|
||||
"text": "summer best"
|
||||
},
|
||||
])).unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_proximity_simple() {
|
||||
let index = create_simple_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 9, 10, 7, 6, 5, 2, 3, 0, 1]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quickbrown fox jumps over the lazy dog\"",
|
||||
"\"the quack brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the really quick brown fox jumps over the lazy dog\"",
|
||||
"\"the really quick brown fox jumps over the very lazy dog\"",
|
||||
"\"brown quick fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy. dog\"",
|
||||
"\"dog the quick brown fox jumps over the lazy\"",
|
||||
"\"the very quick dark brown and smart fox did jump over the terribly lazy and small dog\"",
|
||||
"\"the. quick brown fox jumps over the lazy. dog\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_proximity_split_word() {
|
||||
let index = create_edge_cases_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("sunflower wilting");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 5, 1, 3]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
// TODO: "2" and "4" should be swapped ideally
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"",
|
||||
"\"sun flower wilting under the heat\"",
|
||||
"\"sunflower wilting under the heat\"",
|
||||
"\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"",
|
||||
"\"A flower wilting under the sun, unlike a sunflower\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("\"sun flower\" wilting");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
// TODO: "2" and "4" should be swapped ideally
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"",
|
||||
"\"sun flower wilting under the heat\"",
|
||||
"\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"",
|
||||
]
|
||||
"###);
|
||||
drop(txn);
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
let mut syns = HashMap::new();
|
||||
syns.insert("xyz".to_owned(), vec!["sun flower".to_owned()]);
|
||||
s.set_synonyms(syns);
|
||||
})
|
||||
.unwrap();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("xyz wilting");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 4, 1]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
// TODO: "2" and "4" should be swapped ideally
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"Sun Flower sounds like the title of a painting, maybe about a flower wilting under the heat.\"",
|
||||
"\"sun flower wilting under the heat\"",
|
||||
"\"Sun Flower sounds like the title of a painting, maybe about a plant wilting under the heat.\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_proximity_prefix_db() {
|
||||
let index = create_edge_cases_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("best s");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 13, 9, 12, 8, 6, 7, 11]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
|
||||
// This test illustrates the loss of precision from using the prefix DB
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"this is the best summer meal\"",
|
||||
"\"summer best\"",
|
||||
"\"this is the best meal of summer\"",
|
||||
"\"summer x best\"",
|
||||
"\"this is the best meal of the summer\"",
|
||||
"\"this is the best meal I have ever had in such a beautiful summer day\"",
|
||||
"\"this is the best cooked meal of the summer\"",
|
||||
"\"summer x y best\"",
|
||||
]
|
||||
"###);
|
||||
}
|
@ -21,8 +21,8 @@ if `words` doesn't exist before it.
|
||||
use std::collections::HashMap;
|
||||
|
||||
use crate::{
|
||||
index::tests::TempIndex, Criterion,
|
||||
Search, SearchResult, TermsMatchingStrategy,
|
||||
index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
|
||||
SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
@ -130,6 +130,10 @@ fn create_index() -> TempIndex {
|
||||
"id": 22,
|
||||
"text": "the quick brown fox jumps over the lackadaisical dog"
|
||||
},
|
||||
{
|
||||
"id": 23,
|
||||
"text": "the quivk brown fox jumps over the lazy dog"
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
@ -151,6 +155,12 @@ fn test_no_typo() {
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -168,7 +178,14 @@ fn test_default_typo() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 23]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quivk brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// 1 typo on one word, replaced letter
|
||||
let mut s = Search::new(&txn, &index);
|
||||
@ -176,6 +193,12 @@ fn test_default_typo() {
|
||||
s.query("the quack brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// 1 typo on one word, missing letter, extra letter
|
||||
let mut s = Search::new(&txn, &index);
|
||||
@ -183,6 +206,12 @@ fn test_default_typo() {
|
||||
s.query("the quicest brownest fox jummps over the laziest dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quickest brownest fox jumps over the laziest dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// 1 typo on one word, swapped letters
|
||||
let mut s = Search::new(&txn, &index);
|
||||
@ -190,6 +219,12 @@ fn test_default_typo() {
|
||||
s.query("the quikc borwn fox jupms over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// 1 first letter typo on a word <5 bytes, replaced letter
|
||||
let mut s = Search::new(&txn, &index);
|
||||
@ -211,6 +246,12 @@ fn test_default_typo() {
|
||||
s.query("the quack brawn fox junps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// 2 typos on words < 9 bytes
|
||||
let mut s = Search::new(&txn, &index);
|
||||
@ -225,6 +266,12 @@ fn test_default_typo() {
|
||||
s.query("the extravant fox kyrocketed over the lamguorout dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the extravagant fox skyrocketed over the languorous dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// 2 typos on words >= 9 bytes: 2 extra letters in a single word, swapped letters + extra letter, replaced letters
|
||||
let mut s = Search::new(&txn, &index);
|
||||
@ -232,6 +279,12 @@ fn test_default_typo() {
|
||||
s.query("the extravaganttt fox sktyrocnketed over the lagnuorrous dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the extravagant fox skyrocketed over the languorous dog\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -244,6 +297,8 @@ fn test_phrase_no_typo_allowed() {
|
||||
s.query("the \"quick brewn\" fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @"[]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -256,12 +311,20 @@ fn test_ngram_typos() {
|
||||
s.query("the extra lagant fox skyrocketed over the languorous dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the extravagant fox skyrocketed over the languorous dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the ex tra lagant fox skyrocketed over the languorous dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @"[]");
|
||||
}
|
||||
#[test]
|
||||
fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
|
||||
@ -278,7 +341,29 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids: ids_1, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
|
||||
insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &ids_1);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quivk brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
"\"the quick brown fox jumps over\"",
|
||||
"\"the quick brown fox jumps over the lackadaisical dog\"",
|
||||
"\"the quick brown fox jumps\"",
|
||||
"\"the quick brown fox\"",
|
||||
"\"the quick brown foxes jump over the lazy dog\"",
|
||||
"\"the quick brown fax sends a letter to the dog\"",
|
||||
"\"the quick brown\"",
|
||||
"\"the quick\"",
|
||||
"\"a fox doesn't quack, that crown goes to the duck.\"",
|
||||
"\"the quickest brownest fox jumps over the laziest dog\"",
|
||||
"\"the quicker browner fox jumped over the lazier dog\"",
|
||||
"\"the extravagant fox skyrocketed over the languorous dog\"",
|
||||
"\"the fast brownish fox jumps over the lackadaisical dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
@ -290,7 +375,7 @@ fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
let SearchResult { documents_ids: ids_2, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
|
||||
insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 23, 7, 8, 9, 22, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
|
||||
|
||||
assert_eq!(ids_1, ids_2);
|
||||
}
|
||||
@ -307,6 +392,17 @@ fn test_typo_bucketing() {
|
||||
s.query("network interconnection sunflower");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 15, 16, 17, 18, 20]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"netwolk interconections sunflawar\"",
|
||||
"\"network interconnections sunflawer\"",
|
||||
"\"network interconnection sunflower\"",
|
||||
"\"network interconnection sun flower\"",
|
||||
"\"network interconnection sunflowering\"",
|
||||
"\"network interconnection sunflowar\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
// Then with the typo ranking rule
|
||||
drop(txn);
|
||||
@ -322,12 +418,34 @@ fn test_typo_bucketing() {
|
||||
s.query("network interconnection sunflower");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18, 17, 20, 15, 14]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"network interconnection sunflower\"",
|
||||
"\"network interconnection sunflowering\"",
|
||||
"\"network interconnection sun flower\"",
|
||||
"\"network interconnection sunflowar\"",
|
||||
"\"network interconnections sunflawer\"",
|
||||
"\"netwolk interconections sunflawar\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("network interconnection sun flower");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[17, 19, 16, 18, 20, 15]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"network interconnection sun flower\"",
|
||||
"\"network interconnection sun flowering\"",
|
||||
"\"network interconnection sunflower\"",
|
||||
"\"network interconnection sunflowering\"",
|
||||
"\"network interconnection sunflowar\"",
|
||||
"\"network interconnections sunflawer\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
@ -350,7 +468,15 @@ fn test_typo_synonyms() {
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
s.query("the quick brown fox jumps over the lackadaisical dog");
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 22, 23]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lackadaisical dog\"",
|
||||
"\"the quivk brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
@ -359,5 +485,13 @@ fn test_typo_synonyms() {
|
||||
// TODO: is this correct? interaction of ngrams + synonyms means that the
|
||||
// multi-word synonyms end up having a typo cost. This is probably not what we want.
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]");
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0, 22]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the fast brownish fox jumps over the lackadaisical dog\"",
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lackadaisical dog\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
@ -12,9 +12,12 @@ account by the proximity ranking rule.
|
||||
7. The search is capable of returning no results if no documents match the query
|
||||
*/
|
||||
|
||||
use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use crate::{
|
||||
index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
|
||||
SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_quick_brown_fox_trivial_index() -> TempIndex {
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
@ -126,7 +129,7 @@ fn create_quick_brown_fox_trivial_index() -> TempIndex {
|
||||
|
||||
#[test]
|
||||
fn test_words_tms_last_simple() {
|
||||
let index = create_quick_brown_fox_trivial_index();
|
||||
let index = create_index();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut s = Search::new(&txn, &index);
|
||||
@ -136,6 +139,31 @@ fn test_words_tms_last_simple() {
|
||||
|
||||
// 6 and 7 have the same score because "the" appears twice
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 8, 6, 7, 5, 4, 11, 12, 3]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the lazy dog\"",
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"the great quick brown fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
"\"the quick brown fox jumps\"",
|
||||
"\"the quick brown fox\"",
|
||||
"\"the quick brown fox talks to the lazy and slow dog\"",
|
||||
"\"the quick brown fox talks to the lazy dog\"",
|
||||
"\"the quick brown\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("extravagant the quick brown fox jumps over the lazy dog");
|
||||
@ -146,7 +174,7 @@ fn test_words_tms_last_simple() {
|
||||
|
||||
#[test]
|
||||
fn test_words_tms_last_phrase() {
|
||||
let index = create_quick_brown_fox_trivial_index();
|
||||
let index = create_index();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut s = Search::new(&txn, &index);
|
||||
@ -156,6 +184,21 @@ fn test_words_tms_last_phrase() {
|
||||
|
||||
// "The quick brown fox" is a phrase, not deleted by this term matching strategy
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 6, 7, 5, 4, 11, 12]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||
"\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
"\"the quick brown fox jumps\"",
|
||||
"\"the quick brown fox\"",
|
||||
"\"the quick brown fox talks to the lazy and slow dog\"",
|
||||
"\"the quick brown fox talks to the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("\"the quick brown fox\" jumps over the \"lazy\" dog");
|
||||
@ -165,6 +208,17 @@ fn test_words_tms_last_phrase() {
|
||||
// "lazy" is a phrase, not deleted by this term matching strategy
|
||||
// but words before it can be deleted
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 11, 12]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||
"\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox talks to the lazy and slow dog\"",
|
||||
"\"the quick brown fox talks to the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("\"the quick brown fox jumps over the lazy dog\"");
|
||||
@ -173,6 +227,12 @@ fn test_words_tms_last_phrase() {
|
||||
|
||||
// The whole query is a phrase, no terms are removed
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("\"the quick brown fox jumps over the lazy dog");
|
||||
@ -181,11 +241,17 @@ fn test_words_tms_last_phrase() {
|
||||
|
||||
// The whole query is still a phrase, even without closing quotes, so no terms are removed
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_words_proximity_tms_last_simple() {
|
||||
let index = create_quick_brown_fox_trivial_index();
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
|
||||
@ -200,6 +266,31 @@ fn test_words_proximity_tms_last_simple() {
|
||||
|
||||
// 7 is better than 6 because of the proximity between "the" and its surrounding terms
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
|
||||
"\"the great quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
"\"the quick brown fox jumps over\"",
|
||||
"\"the quick brown fox jumps\"",
|
||||
"\"the quick brown fox\"",
|
||||
"\"the quick brown fox talks to the lazy and slow dog\"",
|
||||
"\"the quick brown fox talks to the lazy dog\"",
|
||||
"\"the quick brown\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("the brown quick fox jumps over the lazy dog");
|
||||
@ -208,11 +299,36 @@ fn test_words_proximity_tms_last_simple() {
|
||||
|
||||
// 10 is better than 9 because of the proximity between "quick" and "brown"
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the brown quick fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
|
||||
"\"the great quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
"\"the quick brown fox jumps over\"",
|
||||
"\"the quick brown fox jumps\"",
|
||||
"\"the quick brown fox\"",
|
||||
"\"the quick brown fox talks to the lazy and slow dog\"",
|
||||
"\"the quick brown fox talks to the lazy dog\"",
|
||||
"\"the quick brown\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_words_proximity_tms_last_phrase() {
|
||||
let index = create_quick_brown_fox_trivial_index();
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
|
||||
@ -228,6 +344,26 @@ fn test_words_proximity_tms_last_phrase() {
|
||||
// "quick brown" is a phrase. The proximity of its first and last words
|
||||
// to their adjacent query words should be taken into account
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5, 4, 11, 12, 3]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
|
||||
"\"the great quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
"\"the quick brown fox jumps over\"",
|
||||
"\"the quick brown fox jumps\"",
|
||||
"\"the quick brown fox\"",
|
||||
"\"the quick brown fox talks to the lazy and slow dog\"",
|
||||
"\"the quick brown fox talks to the lazy dog\"",
|
||||
"\"the quick brown\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("the \"quick brown\" \"fox jumps\" over the lazy dog");
|
||||
@ -238,11 +374,27 @@ fn test_words_proximity_tms_last_phrase() {
|
||||
// to their adjacent query words should be taken into account.
|
||||
// The same applies to `fox jumps`.
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
|
||||
"\"the great quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the\"",
|
||||
"\"the quick brown fox jumps over\"",
|
||||
"\"the quick brown fox jumps\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_words_tms_all() {
|
||||
let index = create_quick_brown_fox_trivial_index();
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
|
||||
@ -256,6 +408,23 @@ fn test_words_tms_all() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.\"",
|
||||
"\"the great quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the really lazy dog\"",
|
||||
"\"the mighty and quick brown fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the lazy dog\"",
|
||||
"\"the brown quick fox jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy dog\"",
|
||||
"\"the brown quick fox immediately jumps over the really lazy blue dog\"",
|
||||
"\"this quick brown and scary fox jumps over the lazy dog\"",
|
||||
"\"this quick brown and very scary fox jumps over the lazy dog\"",
|
||||
"\"the, quick, brown, fox, jumps, over, the, lazy, dog\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("extravagant");
|
||||
@ -263,4 +432,6 @@ fn test_words_tms_all() {
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
|
||||
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(texts, @"[]");
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user