mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 21:50:07 +01:00
Add more search tests
This commit is contained in:
parent
4c8a0179ba
commit
6e50f23896
@ -3,8 +3,10 @@ pub mod distinct;
|
|||||||
pub mod language;
|
pub mod language;
|
||||||
pub mod ngram_split_words;
|
pub mod ngram_split_words;
|
||||||
pub mod proximity;
|
pub mod proximity;
|
||||||
|
pub mod proximity_typo;
|
||||||
pub mod sort;
|
pub mod sort;
|
||||||
pub mod typo;
|
pub mod typo;
|
||||||
|
pub mod typo_proximity;
|
||||||
pub mod words_tms;
|
pub mod words_tms;
|
||||||
|
|
||||||
fn collect_field_values(
|
fn collect_field_values(
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
/*!
|
/*!
|
||||||
This module tests the Proximity ranking rule:
|
This module tests the Proximity ranking rule:
|
||||||
|
|
||||||
1. A sprximity of >7 always has the same cost.
|
1. A proximity of >7 always has the same cost.
|
||||||
|
|
||||||
2. Phrase terms can be in sprximity to other terms via their start and end words,
|
2. Phrase terms can be in sprximity to other terms via their start and end words,
|
||||||
but we need to make sure that the phrase exists in the document that meets this
|
but we need to make sure that the phrase exists in the document that meets this
|
||||||
|
68
milli/src/search/new/tests/proximity_typo.rs
Normal file
68
milli/src/search/new/tests/proximity_typo.rs
Normal file
@ -0,0 +1,68 @@
|
|||||||
|
/*!
|
||||||
|
This module tests the interactions between the proximity and typo ranking rules.
|
||||||
|
|
||||||
|
The proximity ranking rule should transform the query graph such that it
|
||||||
|
only contains the word pairs that it used to compute its bucket.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
|
||||||
|
SearchResult, TermsMatchingStrategy,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn create_index() -> TempIndex {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_primary_key("id".to_owned());
|
||||||
|
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||||
|
s.set_criteria(vec![Criterion::Words, Criterion::Proximity, Criterion::Typo]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
// Basic trap.
|
||||||
|
//
|
||||||
|
// We have one document with the perfect word pair: `sommer - holiday`
|
||||||
|
// and another with the perfect word pair: `sommer holidty`.
|
||||||
|
//
|
||||||
|
// The proximity ranking rule will put them both in the same bucket, and it
|
||||||
|
// should minify the query graph to make it represent:
|
||||||
|
// EITHER:
|
||||||
|
// sommer + holiday
|
||||||
|
// OR:
|
||||||
|
// sommer + holidty
|
||||||
|
//
|
||||||
|
// Such that the child typo ranking rule does not find any match
|
||||||
|
// for its zero-typo bucket `summer + holiday`, even though both documents
|
||||||
|
// contain these two exact words.
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"text": "summer. holiday. sommer holidty"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"text": "summer. holiday. sommer holiday"
|
||||||
|
},
|
||||||
|
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_trap_basic() {
|
||||||
|
let index = create_index();
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
s.query("summer holiday");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 3, 2]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
"###);
|
||||||
|
}
|
126
milli/src/search/new/tests/typo_proximity.rs
Normal file
126
milli/src/search/new/tests/typo_proximity.rs
Normal file
@ -0,0 +1,126 @@
|
|||||||
|
/*!
|
||||||
|
This module tests the interactions between the typo and proximity ranking rules.
|
||||||
|
|
||||||
|
The typo ranking rule should transform the query graph such that it only contains
|
||||||
|
the combinations of word derivations that it used to compute its bucket.
|
||||||
|
|
||||||
|
The proximity ranking rule should then look for proximities only between those specific derivations.
|
||||||
|
For example, given the the search query `beautiful summer` and the dataset:
|
||||||
|
```text
|
||||||
|
{ "id": 0, "text": "beautigul summer...... beautiful day in the summer" }
|
||||||
|
{ "id": 1, "text": "beautiful summer" }
|
||||||
|
```
|
||||||
|
Then the document with id `1` should be returned before `0`.
|
||||||
|
The proximity ranking rule is not allowed to look for the proximity between `beautigul` and `summer`
|
||||||
|
because the typo ranking rule before it only used the derivation `beautiful`.
|
||||||
|
*/
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
index::tests::TempIndex, search::new::tests::collect_field_values, Criterion, Search,
|
||||||
|
SearchResult, TermsMatchingStrategy,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn create_index() -> TempIndex {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_primary_key("id".to_owned());
|
||||||
|
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||||
|
s.set_criteria(vec![Criterion::Words, Criterion::Typo, Criterion::Proximity]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
// trap explained in the module documentation
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"text": "beautigul summer. beautiful x y z summer"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"text": "beautiful summer"
|
||||||
|
},
|
||||||
|
// the next 2 documents set up a more complicated trap
|
||||||
|
// with the query `beautiful summer`, we will have:
|
||||||
|
// 1. documents with no typos, id 0 and 1
|
||||||
|
// 2. documents with 1 typos: id 2 and 3, those are interpreted as EITHER
|
||||||
|
// - id 2: "beautigul + summer" ; OR
|
||||||
|
// - id 3: "beautiful + sommer"
|
||||||
|
// To sort these two documents, the proximity ranking rule must use only the
|
||||||
|
// word pairs: `beautigul -- summer` and `beautiful -- sommer` even though
|
||||||
|
// all variations of `beautiful` and `sommer` were used by the typo ranking rule.
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"text": "beautigul sommer. beautigul x summer"
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"text": "beautiful sommer"
|
||||||
|
},
|
||||||
|
// The next two documents lay out an even more complex trap, which the current implementation
|
||||||
|
// fails to handle properly.
|
||||||
|
// With the user query `delicious sweet dessert`, the typo ranking rule will return one bucket of:
|
||||||
|
// - id 4: delicitous + sweet + dessert
|
||||||
|
// - id 5: beautiful + sweet + desgert
|
||||||
|
// The word pairs that the proximity ranking rules is allowed to use are
|
||||||
|
// EITHER:
|
||||||
|
// delicitous -- sweet AND sweet -- dessert
|
||||||
|
// OR
|
||||||
|
// delicious -- sweet AND sweet -- desgert
|
||||||
|
// So the word pair to use for the terms `summer` and `dessert` depend on the
|
||||||
|
// word pairs explored before them.
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"text": "delicitous. sweet. dessert. delicitous sweet desgert",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"text": "delicious. sweet desgert. delicious sweet desgert",
|
||||||
|
},
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_trap_basic_and_complex1() {
|
||||||
|
let index = create_index();
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
s.query("beautiful summer");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 0, 3, 2]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"beautiful summer\"",
|
||||||
|
"\"beautigul summer. beautiful x y z summer\"",
|
||||||
|
"\"beautiful sommer\"",
|
||||||
|
"\"beautigul sommer. beautigul x summer\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_trap_complex2() {
|
||||||
|
let index = create_index();
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||||
|
s.query("delicious sweet dessert");
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 5]");
|
||||||
|
let texts = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
// TODO: this is incorrect. 5 should appear before 4
|
||||||
|
insta::assert_debug_snapshot!(texts, @r###"
|
||||||
|
[
|
||||||
|
"\"delicitous. sweet. dessert. delicitous sweet desgert\"",
|
||||||
|
"\"delicious. sweet desgert. delicious sweet desgert\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
Loading…
x
Reference in New Issue
Block a user