mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-11 15:38:55 +01:00
Add more search tests
This commit is contained in:
parent
62b9c6fbee
commit
959e4607bb
@ -41,7 +41,7 @@ pub fn apply_distinct_rule(
|
|||||||
}
|
}
|
||||||
|
|
||||||
/// Apply the distinct rule defined by [`apply_distinct_rule`] for a single document id.
|
/// Apply the distinct rule defined by [`apply_distinct_rule`] for a single document id.
|
||||||
fn distinct_single_docid(
|
pub fn distinct_single_docid(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &RoTxn,
|
txn: &RoTxn,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
|
@ -1,3 +1,4 @@
|
|||||||
|
mod bucket_sort;
|
||||||
mod db_cache;
|
mod db_cache;
|
||||||
mod distinct;
|
mod distinct;
|
||||||
mod graph_based_ranking_rule;
|
mod graph_based_ranking_rule;
|
||||||
@ -31,7 +32,8 @@ pub use logger::detailed::DetailedSearchLogger;
|
|||||||
pub use logger::{DefaultSearchLogger, SearchLogger};
|
pub use logger::{DefaultSearchLogger, SearchLogger};
|
||||||
use query_graph::{QueryGraph, QueryNode};
|
use query_graph::{QueryGraph, QueryNode};
|
||||||
use query_term::{located_query_terms_from_string, Phrase, QueryTerm};
|
use query_term::{located_query_terms_from_string, Phrase, QueryTerm};
|
||||||
use ranking_rules::{bucket_sort, PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait};
|
use ranking_rules::{PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait};
|
||||||
|
use bucket_sort::bucket_sort;
|
||||||
use resolve_query_graph::PhraseDocIdsCache;
|
use resolve_query_graph::PhraseDocIdsCache;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use words::Words;
|
use words::Words;
|
||||||
|
590
milli/src/search/new/tests/distinct.rs
Normal file
590
milli/src/search/new/tests/distinct.rs
Normal file
@ -0,0 +1,590 @@
|
|||||||
|
/*!
|
||||||
|
This module tests the "distinct attribute" feature, and its
|
||||||
|
interaction with other ranking rules.
|
||||||
|
|
||||||
|
1. no duplicate distinct attributes are ever returned
|
||||||
|
2. only the best document (according to the search rules) for each distinct value appears in the result
|
||||||
|
3. if a document does not have a distinct attribute, then the distinct rule does not apply to it
|
||||||
|
|
||||||
|
It doesn't test properly:
|
||||||
|
- combination of distinct + exhaustive_nbr_hits (because we know it's incorrect)
|
||||||
|
- distinct attributes with arrays (because we know it's incorrect as well)
|
||||||
|
*/
|
||||||
|
|
||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
|
use big_s::S;
|
||||||
|
use heed::RoTxn;
|
||||||
|
use maplit::hashset;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
index::tests::TempIndex, AscDesc, Criterion, Index, Member, Search, SearchResult,
|
||||||
|
TermsMatchingStrategy,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::collect_field_values;
|
||||||
|
|
||||||
|
fn create_index() -> TempIndex {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_primary_key("id".to_owned());
|
||||||
|
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||||
|
s.set_sortable_fields(hashset! { S("rank1"), S("letter") });
|
||||||
|
s.set_distinct_field("letter".to_owned());
|
||||||
|
s.set_criteria(vec![Criterion::Words]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"letter": "A",
|
||||||
|
"rank1": 0,
|
||||||
|
"text": "the quick brown fox jamps over the lazy dog",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"letter": "A",
|
||||||
|
"rank1": 1,
|
||||||
|
"text": "the quick brown fox jumpes over the lazy dog",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"letter": "B",
|
||||||
|
"rank1": 0,
|
||||||
|
"text": "the quick brown foxjumps over the lazy dog",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"letter": "B",
|
||||||
|
"rank1": 1,
|
||||||
|
"text": "the quick brown fox jumps over the lazy dog",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"letter": "B",
|
||||||
|
"rank1": 2,
|
||||||
|
"text": "the quick brown fox jumps over the lazy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"letter": "C",
|
||||||
|
"rank1": 0,
|
||||||
|
"text": "the quickbrownfox jumps over the lazy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"letter": "C",
|
||||||
|
"rank1": 1,
|
||||||
|
"text": "the quick brown fox jumpss over the lazy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"letter": "C",
|
||||||
|
"rank1": 2,
|
||||||
|
"text": "the quick brown fox jumps over the lazy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"letter": "D",
|
||||||
|
"rank1": 0,
|
||||||
|
"text": "the quick brown fox jumps over the lazy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"letter": "E",
|
||||||
|
"rank1": 0,
|
||||||
|
"text": "the quick brown fox jumps over the lazy",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"letter": "E",
|
||||||
|
"rank1": 1,
|
||||||
|
"text": "the quackbrown foxjunps over",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"letter": "E",
|
||||||
|
"rank1": 2,
|
||||||
|
"text": "the quicko browno fox junps over",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 12,
|
||||||
|
"letter": "E",
|
||||||
|
"rank1": 3,
|
||||||
|
"text": "the quicko browno fox jumps over",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"letter": "E",
|
||||||
|
"rank1": 4,
|
||||||
|
"text": "the quick brewn fox jumps over",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 14,
|
||||||
|
"letter": "E",
|
||||||
|
"rank1": 5,
|
||||||
|
"text": "the quick brown fox jumps over",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 15,
|
||||||
|
"letter": "F",
|
||||||
|
"rank1": 0,
|
||||||
|
"text": "the quick brownf fox jumps over",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 16,
|
||||||
|
"letter": "F",
|
||||||
|
"rank1": 1,
|
||||||
|
"text": "the quic brown fox jamps over",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 17,
|
||||||
|
"letter": "F",
|
||||||
|
"rank1": 2,
|
||||||
|
"text": "thequick browns fox jimps",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 18,
|
||||||
|
"letter": "G",
|
||||||
|
"rank1": 0,
|
||||||
|
"text": "the qick brown fox jumps",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 19,
|
||||||
|
"letter": "G",
|
||||||
|
"rank1": 1,
|
||||||
|
"text": "the quick brownfoxjumps",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 20,
|
||||||
|
"letter": "H",
|
||||||
|
"rank1": 0,
|
||||||
|
"text": "the quick brow fox jumps",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21,
|
||||||
|
"letter": "I",
|
||||||
|
"rank1": 0,
|
||||||
|
"text": "the quick brown fox jpmps",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 22,
|
||||||
|
"letter": "I",
|
||||||
|
"rank1": 1,
|
||||||
|
"text": "the quick brown fox jumps",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 23,
|
||||||
|
"letter": "I",
|
||||||
|
"rank1": 2,
|
||||||
|
"text": "the quick",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 24,
|
||||||
|
"rank1": 0,
|
||||||
|
"text": "the quick",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 25,
|
||||||
|
"rank1": 1,
|
||||||
|
"text": "the quick brown",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 26,
|
||||||
|
"rank1": 2,
|
||||||
|
"text": "the quick brown fox",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 26,
|
||||||
|
"rank1": 3,
|
||||||
|
"text": "the quick brown fox jumps over the lazy dog",
|
||||||
|
},
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
|
fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> {
|
||||||
|
let vs = collect_field_values(index, txn, index.distinct_field(txn).unwrap().unwrap(), docids);
|
||||||
|
|
||||||
|
let mut unique = HashSet::new();
|
||||||
|
for v in vs.iter() {
|
||||||
|
if v == "__does_not_exist__" {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
assert!(unique.insert(v.clone()));
|
||||||
|
}
|
||||||
|
|
||||||
|
vs
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_distinct_placeholder_no_ranking_rules() {
|
||||||
|
let index = create_index();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let s = Search::new(&txn, &index);
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
|
||||||
|
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||||
|
[
|
||||||
|
"\"A\"",
|
||||||
|
"\"B\"",
|
||||||
|
"\"C\"",
|
||||||
|
"\"D\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"F\"",
|
||||||
|
"\"G\"",
|
||||||
|
"\"H\"",
|
||||||
|
"\"I\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"__does_not_exist__",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_distinct_placeholder_sort() {
|
||||||
|
let index = create_index();
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_criteria(vec![Criterion::Sort]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]");
|
||||||
|
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||||
|
[
|
||||||
|
"\"E\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"\"B\"",
|
||||||
|
"\"C\"",
|
||||||
|
"\"F\"",
|
||||||
|
"\"I\"",
|
||||||
|
"\"A\"",
|
||||||
|
"\"G\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"\"D\"",
|
||||||
|
"\"H\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(rank_values, @r###"
|
||||||
|
[
|
||||||
|
"5",
|
||||||
|
"3",
|
||||||
|
"2",
|
||||||
|
"2",
|
||||||
|
"2",
|
||||||
|
"2",
|
||||||
|
"1",
|
||||||
|
"1",
|
||||||
|
"1",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]");
|
||||||
|
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||||
|
[
|
||||||
|
"\"I\"",
|
||||||
|
"\"H\"",
|
||||||
|
"\"G\"",
|
||||||
|
"\"F\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"D\"",
|
||||||
|
"\"C\"",
|
||||||
|
"\"B\"",
|
||||||
|
"\"A\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"__does_not_exist__",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(rank_values, @r###"
|
||||||
|
[
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"1",
|
||||||
|
"3",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.sort_criteria(vec![
|
||||||
|
AscDesc::Desc(Member::Field(S("letter"))),
|
||||||
|
AscDesc::Desc(Member::Field(S("rank1"))),
|
||||||
|
]);
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]");
|
||||||
|
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||||
|
[
|
||||||
|
"\"I\"",
|
||||||
|
"\"H\"",
|
||||||
|
"\"G\"",
|
||||||
|
"\"F\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"D\"",
|
||||||
|
"\"C\"",
|
||||||
|
"\"B\"",
|
||||||
|
"\"A\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"__does_not_exist__",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(rank_values, @r###"
|
||||||
|
[
|
||||||
|
"2",
|
||||||
|
"0",
|
||||||
|
"1",
|
||||||
|
"2",
|
||||||
|
"5",
|
||||||
|
"0",
|
||||||
|
"2",
|
||||||
|
"2",
|
||||||
|
"1",
|
||||||
|
"3",
|
||||||
|
"1",
|
||||||
|
"0",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_distinct_words() {
|
||||||
|
let index = create_index();
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_criteria(vec![Criterion::Words]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]");
|
||||||
|
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||||
|
[
|
||||||
|
"\"A\"",
|
||||||
|
"\"B\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"\"C\"",
|
||||||
|
"\"D\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"F\"",
|
||||||
|
"\"G\"",
|
||||||
|
"\"H\"",
|
||||||
|
"\"I\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"__does_not_exist__",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
let text_values = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(text_values, @r###"
|
||||||
|
[
|
||||||
|
"\"the quick brown fox jamps over the lazy dog\"",
|
||||||
|
"\"the quick brown foxjumps over the lazy dog\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
"\"the quickbrownfox jumps over the lazy\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
|
"\"the quick brownf fox jumps over\"",
|
||||||
|
"\"the qick brown fox jumps\"",
|
||||||
|
"\"the quick brow fox jumps\"",
|
||||||
|
"\"the quick brown fox jpmps\"",
|
||||||
|
"\"the quick brown\"",
|
||||||
|
"\"the quick\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_distinct_sort_words() {
|
||||||
|
let index = create_index();
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_criteria(vec![Criterion::Sort, Criterion::Words, Criterion::Desc(S("rank1"))]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
|
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]");
|
||||||
|
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||||
|
[
|
||||||
|
"\"I\"",
|
||||||
|
"\"H\"",
|
||||||
|
"\"G\"",
|
||||||
|
"\"F\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"D\"",
|
||||||
|
"\"C\"",
|
||||||
|
"\"B\"",
|
||||||
|
"\"A\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"__does_not_exist__",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(rank_values, @r###"
|
||||||
|
[
|
||||||
|
"1",
|
||||||
|
"0",
|
||||||
|
"1",
|
||||||
|
"1",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"2",
|
||||||
|
"1",
|
||||||
|
"1",
|
||||||
|
"3",
|
||||||
|
"1",
|
||||||
|
"0",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let text_values = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(text_values, @r###"
|
||||||
|
[
|
||||||
|
"\"the quick brown fox jumps\"",
|
||||||
|
"\"the quick brow fox jumps\"",
|
||||||
|
"\"the quick brownfoxjumps\"",
|
||||||
|
"\"the quic brown fox jamps over\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
"\"the quick brown fox jumpes over the lazy dog\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
"\"the quick brown\"",
|
||||||
|
"\"the quick\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_distinct_all_candidates() {
|
||||||
|
let index = create_index();
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_criteria(vec![Criterion::Sort]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
|
||||||
|
s.exhaustive_number_hits(true);
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, candidates, .. } = s.execute().unwrap();
|
||||||
|
let candidates = candidates.iter().collect::<Vec<_>>();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]");
|
||||||
|
// TODO: this is incorrect!
|
||||||
|
insta::assert_snapshot!(format!("{candidates:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_distinct_typo() {
|
||||||
|
let index = create_index();
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.query("the quick brown fox jumps over the lazy dog");
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]");
|
||||||
|
|
||||||
|
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||||
|
[
|
||||||
|
"\"B\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"\"A\"",
|
||||||
|
"\"C\"",
|
||||||
|
"\"D\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"F\"",
|
||||||
|
"\"I\"",
|
||||||
|
"\"G\"",
|
||||||
|
"\"H\"",
|
||||||
|
"__does_not_exist__",
|
||||||
|
"__does_not_exist__",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let text_values = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(text_values, @r###"
|
||||||
|
[
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy dog\"",
|
||||||
|
"\"the quick brown fox jamps over the lazy dog\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
|
"\"the quick brown fox jumps over the lazy\"",
|
||||||
|
"\"the quick brownf fox jumps over\"",
|
||||||
|
"\"the quick brown fox jumps\"",
|
||||||
|
"\"the qick brown fox jumps\"",
|
||||||
|
"\"the quick brow fox jumps\"",
|
||||||
|
"\"the quick brown\"",
|
||||||
|
"\"the quick\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
22
milli/src/search/new/tests/language.rs
Normal file
22
milli/src/search/new/tests/language.rs
Normal file
@ -0,0 +1,22 @@
|
|||||||
|
use crate::{index::tests::TempIndex, Search, SearchResult};
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_kanji_language_detection() {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{ "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
|
||||||
|
{ "id": 1, "title": "東京のお寿司。" },
|
||||||
|
{ "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
let txn = index.write_txn().unwrap();
|
||||||
|
let mut search = Search::new(&txn, &index);
|
||||||
|
|
||||||
|
search.query("東京");
|
||||||
|
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||||
|
|
||||||
|
assert_eq!(documents_ids, vec![1]);
|
||||||
|
}
|
@ -1,3 +1,28 @@
|
|||||||
|
pub mod distinct;
|
||||||
|
#[cfg(feature = "default")]
|
||||||
|
pub mod language;
|
||||||
pub mod ngram_split_words;
|
pub mod ngram_split_words;
|
||||||
|
pub mod proximity;
|
||||||
|
pub mod sort;
|
||||||
pub mod typo;
|
pub mod typo;
|
||||||
pub mod words_tms;
|
pub mod words_tms;
|
||||||
|
|
||||||
|
fn collect_field_values(
|
||||||
|
index: &crate::Index,
|
||||||
|
txn: &heed::RoTxn,
|
||||||
|
fid: &str,
|
||||||
|
docids: &[u32],
|
||||||
|
) -> Vec<String> {
|
||||||
|
let mut values = vec![];
|
||||||
|
let fid = index.fields_ids_map(txn).unwrap().id(fid).unwrap();
|
||||||
|
for doc in index.documents(txn, docids.iter().copied()).unwrap() {
|
||||||
|
if let Some(v) = doc.1.get(fid) {
|
||||||
|
let v: serde_json::Value = serde_json::from_slice(v).unwrap();
|
||||||
|
let v = v.to_string();
|
||||||
|
values.push(v);
|
||||||
|
} else {
|
||||||
|
values.push("__does_not_exist__".to_owned());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
values
|
||||||
|
}
|
||||||
|
0
milli/src/search/new/tests/proximity.rs
Normal file
0
milli/src/search/new/tests/proximity.rs
Normal file
316
milli/src/search/new/tests/sort.rs
Normal file
316
milli/src/search/new/tests/sort.rs
Normal file
@ -0,0 +1,316 @@
|
|||||||
|
/*!
|
||||||
|
This module tests the `sort` ranking rule:
|
||||||
|
|
||||||
|
1. an error is returned if the sort ranking rule exists but no fields-to-sort were given at search time
|
||||||
|
2. an error is returned if the fields-to-sort are not sortable
|
||||||
|
3. it is possible to add multiple fields-to-sort at search time
|
||||||
|
4. custom sort ranking rules can be added to the settings, they interact with the generic `sort` ranking rule as expected
|
||||||
|
5. numbers appear before strings
|
||||||
|
6. documents with either: (1) no value, (2) null, or (3) an object for the field-to-sort appear at the end of the bucket
|
||||||
|
7. boolean values are translated to strings
|
||||||
|
8. if a field contains an array, it is sorted by the best value in the array according to the sort rule
|
||||||
|
*/
|
||||||
|
|
||||||
|
use big_s::S;
|
||||||
|
use maplit::hashset;
|
||||||
|
|
||||||
|
use crate::{
|
||||||
|
index::tests::TempIndex, search::new::tests::collect_field_values, AscDesc, Criterion, Member,
|
||||||
|
Search, SearchResult, TermsMatchingStrategy,
|
||||||
|
};
|
||||||
|
|
||||||
|
fn create_index() -> TempIndex {
|
||||||
|
let index = TempIndex::new();
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_primary_key("id".to_owned());
|
||||||
|
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||||
|
s.set_sortable_fields(hashset! { S("rank"), S("vague"), S("letter") });
|
||||||
|
s.set_criteria(vec![Criterion::Sort]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{
|
||||||
|
"id": 0,
|
||||||
|
"letter": "A",
|
||||||
|
"rank": 0,
|
||||||
|
"vague": 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 1,
|
||||||
|
"letter": "A",
|
||||||
|
"rank": 1,
|
||||||
|
"vague": "0",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 2,
|
||||||
|
"letter": "B",
|
||||||
|
"rank": 0,
|
||||||
|
"vague": 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 3,
|
||||||
|
"letter": "B",
|
||||||
|
"rank": 1,
|
||||||
|
"vague": "1",
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 4,
|
||||||
|
"letter": "B",
|
||||||
|
"rank": 2,
|
||||||
|
"vague": [1, 2],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 5,
|
||||||
|
"letter": "C",
|
||||||
|
"rank": 0,
|
||||||
|
"vague": [1, "2"],
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 6,
|
||||||
|
"letter": "C",
|
||||||
|
"rank": 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 7,
|
||||||
|
"letter": "C",
|
||||||
|
"rank": 2,
|
||||||
|
"vague": null,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 8,
|
||||||
|
"letter": "D",
|
||||||
|
"rank": 0,
|
||||||
|
"vague": [null, null, ""]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 9,
|
||||||
|
"letter": "E",
|
||||||
|
"rank": 0,
|
||||||
|
"vague": ""
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 10,
|
||||||
|
"letter": "E",
|
||||||
|
"rank": 1,
|
||||||
|
"vague": {
|
||||||
|
"sub": 0,
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 11,
|
||||||
|
"letter": "E",
|
||||||
|
"rank": 2,
|
||||||
|
"vague": true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 12,
|
||||||
|
"letter": "E",
|
||||||
|
"rank": 3,
|
||||||
|
"vague": false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 13,
|
||||||
|
"letter": "E",
|
||||||
|
"rank": 4,
|
||||||
|
"vague": 1.5673,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 14,
|
||||||
|
"letter": "E",
|
||||||
|
"rank": 5,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 15,
|
||||||
|
"letter": "F",
|
||||||
|
"rank": 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 16,
|
||||||
|
"letter": "F",
|
||||||
|
"rank": 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 17,
|
||||||
|
"letter": "F",
|
||||||
|
"rank": 2,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 18,
|
||||||
|
"letter": "G",
|
||||||
|
"rank": 0,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 19,
|
||||||
|
"letter": "G",
|
||||||
|
"rank": 1,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 20,
|
||||||
|
"letter": "H",
|
||||||
|
"rank": 0,
|
||||||
|
"vague": true,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 21,
|
||||||
|
"letter": "I",
|
||||||
|
"rank": 0,
|
||||||
|
"vague": false,
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 22,
|
||||||
|
"letter": "I",
|
||||||
|
"rank": 1,
|
||||||
|
"vague": [1.1367, "help", null]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"id": 23,
|
||||||
|
"letter": "I",
|
||||||
|
"rank": 2,
|
||||||
|
"vague": [1.2367, "hello"]
|
||||||
|
},
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
index
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_sort() {
|
||||||
|
let index = create_index();
|
||||||
|
let txn = index.read_txn().unwrap();
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 22, 23, 20, 18, 19, 15, 16, 17, 9, 10, 11, 12, 13, 14, 8, 5, 6, 7, 2]");
|
||||||
|
|
||||||
|
let letter_values = collect_field_values(&index, &txn, "letter", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(letter_values, @r###"
|
||||||
|
[
|
||||||
|
"\"I\"",
|
||||||
|
"\"I\"",
|
||||||
|
"\"I\"",
|
||||||
|
"\"H\"",
|
||||||
|
"\"G\"",
|
||||||
|
"\"G\"",
|
||||||
|
"\"F\"",
|
||||||
|
"\"F\"",
|
||||||
|
"\"F\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"E\"",
|
||||||
|
"\"D\"",
|
||||||
|
"\"C\"",
|
||||||
|
"\"C\"",
|
||||||
|
"\"C\"",
|
||||||
|
"\"B\"",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank")))]);
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 13, 12, 4, 7, 11, 17, 23, 1, 3, 6, 10, 16, 19, 22, 0, 2, 5, 8, 9]");
|
||||||
|
|
||||||
|
let rank_values = collect_field_values(&index, &txn, "rank", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(rank_values, @r###"
|
||||||
|
[
|
||||||
|
"5",
|
||||||
|
"4",
|
||||||
|
"3",
|
||||||
|
"2",
|
||||||
|
"2",
|
||||||
|
"2",
|
||||||
|
"2",
|
||||||
|
"2",
|
||||||
|
"1",
|
||||||
|
"1",
|
||||||
|
"1",
|
||||||
|
"1",
|
||||||
|
"1",
|
||||||
|
"1",
|
||||||
|
"1",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
"0",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
s.sort_criteria(vec![AscDesc::Asc(Member::Field(S("vague")))]);
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 4, 5, 22, 23, 13, 1, 3, 12, 21, 11, 20, 6, 7, 8, 9, 10, 14, 15]");
|
||||||
|
|
||||||
|
let vague_values = collect_field_values(&index, &txn, "vague", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(vague_values, @r###"
|
||||||
|
[
|
||||||
|
"0",
|
||||||
|
"1",
|
||||||
|
"[1,2]",
|
||||||
|
"[1,\"2\"]",
|
||||||
|
"[1.1367,\"help\",null]",
|
||||||
|
"[1.2367,\"hello\"]",
|
||||||
|
"1.5673",
|
||||||
|
"\"0\"",
|
||||||
|
"\"1\"",
|
||||||
|
"false",
|
||||||
|
"false",
|
||||||
|
"true",
|
||||||
|
"true",
|
||||||
|
"__does_not_exist___",
|
||||||
|
"null",
|
||||||
|
"[null,null,\"\"]",
|
||||||
|
"\"\"",
|
||||||
|
"{\"sub\":0}",
|
||||||
|
"__does_not_exist___",
|
||||||
|
"__does_not_exist___",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
|
||||||
|
let mut s = Search::new(&txn, &index);
|
||||||
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
|
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("vague")))]);
|
||||||
|
|
||||||
|
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||||
|
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 13, 23, 22, 2, 5, 0, 11, 20, 12, 21, 3, 1, 6, 7, 8, 9, 10, 14, 15]");
|
||||||
|
|
||||||
|
let vague_values = collect_field_values(&index, &txn, "vague", &documents_ids);
|
||||||
|
insta::assert_debug_snapshot!(vague_values, @r###"
|
||||||
|
[
|
||||||
|
"[1,2]",
|
||||||
|
"1.5673",
|
||||||
|
"[1.2367,\"hello\"]",
|
||||||
|
"[1.1367,\"help\",null]",
|
||||||
|
"1",
|
||||||
|
"[1,\"2\"]",
|
||||||
|
"0",
|
||||||
|
"true",
|
||||||
|
"true",
|
||||||
|
"false",
|
||||||
|
"false",
|
||||||
|
"\"1\"",
|
||||||
|
"\"0\"",
|
||||||
|
"__does_not_exist___",
|
||||||
|
"null",
|
||||||
|
"[null,null,\"\"]",
|
||||||
|
"\"\"",
|
||||||
|
"{\"sub\":0}",
|
||||||
|
"__does_not_exist___",
|
||||||
|
"__does_not_exist___",
|
||||||
|
]
|
||||||
|
"###);
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user