mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
Add more search tests
This commit is contained in:
parent
62b9c6fbee
commit
959e4607bb
@ -41,7 +41,7 @@ pub fn apply_distinct_rule(
|
||||
}
|
||||
|
||||
/// Apply the distinct rule defined by [`apply_distinct_rule`] for a single document id.
|
||||
fn distinct_single_docid(
|
||||
pub fn distinct_single_docid(
|
||||
index: &Index,
|
||||
txn: &RoTxn,
|
||||
field_id: u16,
|
||||
|
@ -1,3 +1,4 @@
|
||||
mod bucket_sort;
|
||||
mod db_cache;
|
||||
mod distinct;
|
||||
mod graph_based_ranking_rule;
|
||||
@ -31,7 +32,8 @@ pub use logger::detailed::DetailedSearchLogger;
|
||||
pub use logger::{DefaultSearchLogger, SearchLogger};
|
||||
use query_graph::{QueryGraph, QueryNode};
|
||||
use query_term::{located_query_terms_from_string, Phrase, QueryTerm};
|
||||
use ranking_rules::{bucket_sort, PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait};
|
||||
use ranking_rules::{PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait};
|
||||
use bucket_sort::bucket_sort;
|
||||
use resolve_query_graph::PhraseDocIdsCache;
|
||||
use roaring::RoaringBitmap;
|
||||
use words::Words;
|
||||
|
590
milli/src/search/new/tests/distinct.rs
Normal file
590
milli/src/search/new/tests/distinct.rs
Normal file
@ -0,0 +1,590 @@
|
||||
/*!
|
||||
This module tests the "distinct attribute" feature, and its
|
||||
interaction with other ranking rules.
|
||||
|
||||
1. no duplicate distinct attributes are ever returned
|
||||
2. only the best document (according to the search rules) for each distinct value appears in the result
|
||||
3. if a document does not have a distinct attribute, then the distinct rule does not apply to it
|
||||
|
||||
It doesn't test properly:
|
||||
- combination of distinct + exhaustive_nbr_hits (because we know it's incorrect)
|
||||
- distinct attributes with arrays (because we know it's incorrect as well)
|
||||
*/
|
||||
|
||||
use std::collections::HashSet;
|
||||
|
||||
use big_s::S;
|
||||
use heed::RoTxn;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::{
|
||||
index::tests::TempIndex, AscDesc, Criterion, Index, Member, Search, SearchResult,
|
||||
TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
use super::collect_field_values;
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_sortable_fields(hashset! { S("rank1"), S("letter") });
|
||||
s.set_distinct_field("letter".to_owned());
|
||||
s.set_criteria(vec![Criterion::Words]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{
|
||||
"id": 0,
|
||||
"letter": "A",
|
||||
"rank1": 0,
|
||||
"text": "the quick brown fox jamps over the lazy dog",
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"letter": "A",
|
||||
"rank1": 1,
|
||||
"text": "the quick brown fox jumpes over the lazy dog",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"letter": "B",
|
||||
"rank1": 0,
|
||||
"text": "the quick brown foxjumps over the lazy dog",
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"letter": "B",
|
||||
"rank1": 1,
|
||||
"text": "the quick brown fox jumps over the lazy dog",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"letter": "B",
|
||||
"rank1": 2,
|
||||
"text": "the quick brown fox jumps over the lazy",
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"letter": "C",
|
||||
"rank1": 0,
|
||||
"text": "the quickbrownfox jumps over the lazy",
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"letter": "C",
|
||||
"rank1": 1,
|
||||
"text": "the quick brown fox jumpss over the lazy",
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"letter": "C",
|
||||
"rank1": 2,
|
||||
"text": "the quick brown fox jumps over the lazy",
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"letter": "D",
|
||||
"rank1": 0,
|
||||
"text": "the quick brown fox jumps over the lazy",
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"letter": "E",
|
||||
"rank1": 0,
|
||||
"text": "the quick brown fox jumps over the lazy",
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"letter": "E",
|
||||
"rank1": 1,
|
||||
"text": "the quackbrown foxjunps over",
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"letter": "E",
|
||||
"rank1": 2,
|
||||
"text": "the quicko browno fox junps over",
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"letter": "E",
|
||||
"rank1": 3,
|
||||
"text": "the quicko browno fox jumps over",
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"letter": "E",
|
||||
"rank1": 4,
|
||||
"text": "the quick brewn fox jumps over",
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"letter": "E",
|
||||
"rank1": 5,
|
||||
"text": "the quick brown fox jumps over",
|
||||
},
|
||||
{
|
||||
"id": 15,
|
||||
"letter": "F",
|
||||
"rank1": 0,
|
||||
"text": "the quick brownf fox jumps over",
|
||||
},
|
||||
{
|
||||
"id": 16,
|
||||
"letter": "F",
|
||||
"rank1": 1,
|
||||
"text": "the quic brown fox jamps over",
|
||||
},
|
||||
{
|
||||
"id": 17,
|
||||
"letter": "F",
|
||||
"rank1": 2,
|
||||
"text": "thequick browns fox jimps",
|
||||
},
|
||||
{
|
||||
"id": 18,
|
||||
"letter": "G",
|
||||
"rank1": 0,
|
||||
"text": "the qick brown fox jumps",
|
||||
},
|
||||
{
|
||||
"id": 19,
|
||||
"letter": "G",
|
||||
"rank1": 1,
|
||||
"text": "the quick brownfoxjumps",
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"letter": "H",
|
||||
"rank1": 0,
|
||||
"text": "the quick brow fox jumps",
|
||||
},
|
||||
{
|
||||
"id": 21,
|
||||
"letter": "I",
|
||||
"rank1": 0,
|
||||
"text": "the quick brown fox jpmps",
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"letter": "I",
|
||||
"rank1": 1,
|
||||
"text": "the quick brown fox jumps",
|
||||
},
|
||||
{
|
||||
"id": 23,
|
||||
"letter": "I",
|
||||
"rank1": 2,
|
||||
"text": "the quick",
|
||||
},
|
||||
{
|
||||
"id": 24,
|
||||
"rank1": 0,
|
||||
"text": "the quick",
|
||||
},
|
||||
{
|
||||
"id": 25,
|
||||
"rank1": 1,
|
||||
"text": "the quick brown",
|
||||
},
|
||||
{
|
||||
"id": 26,
|
||||
"rank1": 2,
|
||||
"text": "the quick brown fox",
|
||||
},
|
||||
{
|
||||
"id": 26,
|
||||
"rank1": 3,
|
||||
"text": "the quick brown fox jumps over the lazy dog",
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> {
|
||||
let vs = collect_field_values(index, txn, index.distinct_field(txn).unwrap().unwrap(), docids);
|
||||
|
||||
let mut unique = HashSet::new();
|
||||
for v in vs.iter() {
|
||||
if v == "__does_not_exist__" {
|
||||
continue;
|
||||
}
|
||||
assert!(unique.insert(v.clone()));
|
||||
}
|
||||
|
||||
vs
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distinct_placeholder_no_ranking_rules() {
|
||||
let index = create_index();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let s = Search::new(&txn, &index);
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"A\"",
|
||||
"\"B\"",
|
||||
"\"C\"",
|
||||
"\"D\"",
|
||||
"\"E\"",
|
||||
"\"F\"",
|
||||
"\"G\"",
|
||||
"\"H\"",
|
||||
"\"I\"",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distinct_placeholder_sort() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"E\"",
|
||||
"__does_not_exist__",
|
||||
"\"B\"",
|
||||
"\"C\"",
|
||||
"\"F\"",
|
||||
"\"I\"",
|
||||
"\"A\"",
|
||||
"\"G\"",
|
||||
"__does_not_exist__",
|
||||
"\"D\"",
|
||||
"\"H\"",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
|
||||
insta::assert_debug_snapshot!(rank_values, @r###"
|
||||
[
|
||||
"5",
|
||||
"3",
|
||||
"2",
|
||||
"2",
|
||||
"2",
|
||||
"2",
|
||||
"1",
|
||||
"1",
|
||||
"1",
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"I\"",
|
||||
"\"H\"",
|
||||
"\"G\"",
|
||||
"\"F\"",
|
||||
"\"E\"",
|
||||
"\"D\"",
|
||||
"\"C\"",
|
||||
"\"B\"",
|
||||
"\"A\"",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
|
||||
insta::assert_debug_snapshot!(rank_values, @r###"
|
||||
[
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
"1",
|
||||
"3",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.sort_criteria(vec![
|
||||
AscDesc::Desc(Member::Field(S("letter"))),
|
||||
AscDesc::Desc(Member::Field(S("rank1"))),
|
||||
]);
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"I\"",
|
||||
"\"H\"",
|
||||
"\"G\"",
|
||||
"\"F\"",
|
||||
"\"E\"",
|
||||
"\"D\"",
|
||||
"\"C\"",
|
||||
"\"B\"",
|
||||
"\"A\"",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
|
||||
insta::assert_debug_snapshot!(rank_values, @r###"
|
||||
[
|
||||
"2",
|
||||
"0",
|
||||
"1",
|
||||
"2",
|
||||
"5",
|
||||
"0",
|
||||
"2",
|
||||
"2",
|
||||
"1",
|
||||
"3",
|
||||
"1",
|
||||
"0",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distinct_words() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"A\"",
|
||||
"\"B\"",
|
||||
"__does_not_exist__",
|
||||
"\"C\"",
|
||||
"\"D\"",
|
||||
"\"E\"",
|
||||
"\"F\"",
|
||||
"\"G\"",
|
||||
"\"H\"",
|
||||
"\"I\"",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
let text_values = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(text_values, @r###"
|
||||
[
|
||||
"\"the quick brown fox jamps over the lazy dog\"",
|
||||
"\"the quick brown foxjumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quickbrownfox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brownf fox jumps over\"",
|
||||
"\"the qick brown fox jumps\"",
|
||||
"\"the quick brow fox jumps\"",
|
||||
"\"the quick brown fox jpmps\"",
|
||||
"\"the quick brown\"",
|
||||
"\"the quick\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distinct_sort_words() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Sort, Criterion::Words, Criterion::Desc(S("rank1"))]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]");
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"I\"",
|
||||
"\"H\"",
|
||||
"\"G\"",
|
||||
"\"F\"",
|
||||
"\"E\"",
|
||||
"\"D\"",
|
||||
"\"C\"",
|
||||
"\"B\"",
|
||||
"\"A\"",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
|
||||
let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
|
||||
insta::assert_debug_snapshot!(rank_values, @r###"
|
||||
[
|
||||
"1",
|
||||
"0",
|
||||
"1",
|
||||
"1",
|
||||
"0",
|
||||
"0",
|
||||
"2",
|
||||
"1",
|
||||
"1",
|
||||
"3",
|
||||
"1",
|
||||
"0",
|
||||
]
|
||||
"###);
|
||||
|
||||
let text_values = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(text_values, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps\"",
|
||||
"\"the quick brow fox jumps\"",
|
||||
"\"the quick brownfoxjumps\"",
|
||||
"\"the quic brown fox jamps over\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumpes over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown\"",
|
||||
"\"the quick\"",
|
||||
]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distinct_all_candidates() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
|
||||
s.exhaustive_number_hits(true);
|
||||
|
||||
let SearchResult { documents_ids, candidates, .. } = s.execute().unwrap();
|
||||
let candidates = candidates.iter().collect::<Vec<_>>();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]");
|
||||
// TODO: this is incorrect!
|
||||
insta::assert_snapshot!(format!("{candidates:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_distinct_typo() {
|
||||
let index = create_index();
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("the quick brown fox jumps over the lazy dog");
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]");
|
||||
|
||||
let distinct_values = verify_distinct(&index, &txn, &documents_ids);
|
||||
insta::assert_debug_snapshot!(distinct_values, @r###"
|
||||
[
|
||||
"\"B\"",
|
||||
"__does_not_exist__",
|
||||
"\"A\"",
|
||||
"\"C\"",
|
||||
"\"D\"",
|
||||
"\"E\"",
|
||||
"\"F\"",
|
||||
"\"I\"",
|
||||
"\"G\"",
|
||||
"\"H\"",
|
||||
"__does_not_exist__",
|
||||
"__does_not_exist__",
|
||||
]
|
||||
"###);
|
||||
|
||||
let text_values = collect_field_values(&index, &txn, "text", &documents_ids);
|
||||
insta::assert_debug_snapshot!(text_values, @r###"
|
||||
[
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy dog\"",
|
||||
"\"the quick brown fox jamps over the lazy dog\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brown fox jumps over the lazy\"",
|
||||
"\"the quick brownf fox jumps over\"",
|
||||
"\"the quick brown fox jumps\"",
|
||||
"\"the qick brown fox jumps\"",
|
||||
"\"the quick brow fox jumps\"",
|
||||
"\"the quick brown\"",
|
||||
"\"the quick\"",
|
||||
]
|
||||
"###);
|
||||
}
|
22
milli/src/search/new/tests/language.rs
Normal file
22
milli/src/search/new/tests/language.rs
Normal file
@ -0,0 +1,22 @@
|
||||
use crate::{index::tests::TempIndex, Search, SearchResult};
|
||||
|
||||
#[test]
|
||||
fn test_kanji_language_detection() {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
|
||||
{ "id": 1, "title": "東京のお寿司。" },
|
||||
{ "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
let txn = index.write_txn().unwrap();
|
||||
let mut search = Search::new(&txn, &index);
|
||||
|
||||
search.query("東京");
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
assert_eq!(documents_ids, vec![1]);
|
||||
}
|
@ -1,3 +1,28 @@
|
||||
pub mod distinct;
|
||||
#[cfg(feature = "default")]
|
||||
pub mod language;
|
||||
pub mod ngram_split_words;
|
||||
pub mod proximity;
|
||||
pub mod sort;
|
||||
pub mod typo;
|
||||
pub mod words_tms;
|
||||
|
||||
fn collect_field_values(
|
||||
index: &crate::Index,
|
||||
txn: &heed::RoTxn,
|
||||
fid: &str,
|
||||
docids: &[u32],
|
||||
) -> Vec<String> {
|
||||
let mut values = vec![];
|
||||
let fid = index.fields_ids_map(txn).unwrap().id(fid).unwrap();
|
||||
for doc in index.documents(txn, docids.iter().copied()).unwrap() {
|
||||
if let Some(v) = doc.1.get(fid) {
|
||||
let v: serde_json::Value = serde_json::from_slice(v).unwrap();
|
||||
let v = v.to_string();
|
||||
values.push(v);
|
||||
} else {
|
||||
values.push("__does_not_exist__".to_owned());
|
||||
}
|
||||
}
|
||||
values
|
||||
}
|
||||
|
0
milli/src/search/new/tests/proximity.rs
Normal file
0
milli/src/search/new/tests/proximity.rs
Normal file
316
milli/src/search/new/tests/sort.rs
Normal file
316
milli/src/search/new/tests/sort.rs
Normal file
@ -0,0 +1,316 @@
|
||||
/*!
|
||||
This module tests the `sort` ranking rule:
|
||||
|
||||
1. an error is returned if the sort ranking rule exists but no fields-to-sort were given at search time
|
||||
2. an error is returned if the fields-to-sort are not sortable
|
||||
3. it is possible to add multiple fields-to-sort at search time
|
||||
4. custom sort ranking rules can be added to the settings, they interact with the generic `sort` ranking rule as expected
|
||||
5. numbers appear before strings
|
||||
6. documents with either: (1) no value, (2) null, or (3) an object for the field-to-sort appear at the end of the bucket
|
||||
7. boolean values are translated to strings
|
||||
8. if a field contains an array, it is sorted by the best value in the array according to the sort rule
|
||||
*/
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::{
|
||||
index::tests::TempIndex, search::new::tests::collect_field_values, AscDesc, Criterion, Member,
|
||||
Search, SearchResult, TermsMatchingStrategy,
|
||||
};
|
||||
|
||||
fn create_index() -> TempIndex {
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_primary_key("id".to_owned());
|
||||
s.set_searchable_fields(vec!["text".to_owned()]);
|
||||
s.set_sortable_fields(hashset! { S("rank"), S("vague"), S("letter") });
|
||||
s.set_criteria(vec![Criterion::Sort]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{
|
||||
"id": 0,
|
||||
"letter": "A",
|
||||
"rank": 0,
|
||||
"vague": 0,
|
||||
},
|
||||
{
|
||||
"id": 1,
|
||||
"letter": "A",
|
||||
"rank": 1,
|
||||
"vague": "0",
|
||||
},
|
||||
{
|
||||
"id": 2,
|
||||
"letter": "B",
|
||||
"rank": 0,
|
||||
"vague": 1,
|
||||
},
|
||||
{
|
||||
"id": 3,
|
||||
"letter": "B",
|
||||
"rank": 1,
|
||||
"vague": "1",
|
||||
},
|
||||
{
|
||||
"id": 4,
|
||||
"letter": "B",
|
||||
"rank": 2,
|
||||
"vague": [1, 2],
|
||||
},
|
||||
{
|
||||
"id": 5,
|
||||
"letter": "C",
|
||||
"rank": 0,
|
||||
"vague": [1, "2"],
|
||||
},
|
||||
{
|
||||
"id": 6,
|
||||
"letter": "C",
|
||||
"rank": 1,
|
||||
},
|
||||
{
|
||||
"id": 7,
|
||||
"letter": "C",
|
||||
"rank": 2,
|
||||
"vague": null,
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"letter": "D",
|
||||
"rank": 0,
|
||||
"vague": [null, null, ""]
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"letter": "E",
|
||||
"rank": 0,
|
||||
"vague": ""
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"letter": "E",
|
||||
"rank": 1,
|
||||
"vague": {
|
||||
"sub": 0,
|
||||
}
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"letter": "E",
|
||||
"rank": 2,
|
||||
"vague": true,
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"letter": "E",
|
||||
"rank": 3,
|
||||
"vague": false,
|
||||
},
|
||||
{
|
||||
"id": 13,
|
||||
"letter": "E",
|
||||
"rank": 4,
|
||||
"vague": 1.5673,
|
||||
},
|
||||
{
|
||||
"id": 14,
|
||||
"letter": "E",
|
||||
"rank": 5,
|
||||
},
|
||||
{
|
||||
"id": 15,
|
||||
"letter": "F",
|
||||
"rank": 0,
|
||||
},
|
||||
{
|
||||
"id": 16,
|
||||
"letter": "F",
|
||||
"rank": 1,
|
||||
},
|
||||
{
|
||||
"id": 17,
|
||||
"letter": "F",
|
||||
"rank": 2,
|
||||
},
|
||||
{
|
||||
"id": 18,
|
||||
"letter": "G",
|
||||
"rank": 0,
|
||||
},
|
||||
{
|
||||
"id": 19,
|
||||
"letter": "G",
|
||||
"rank": 1,
|
||||
},
|
||||
{
|
||||
"id": 20,
|
||||
"letter": "H",
|
||||
"rank": 0,
|
||||
"vague": true,
|
||||
},
|
||||
{
|
||||
"id": 21,
|
||||
"letter": "I",
|
||||
"rank": 0,
|
||||
"vague": false,
|
||||
},
|
||||
{
|
||||
"id": 22,
|
||||
"letter": "I",
|
||||
"rank": 1,
|
||||
"vague": [1.1367, "help", null]
|
||||
},
|
||||
{
|
||||
"id": 23,
|
||||
"letter": "I",
|
||||
"rank": 2,
|
||||
"vague": [1.2367, "hello"]
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
index
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_sort() {
|
||||
let index = create_index();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 22, 23, 20, 18, 19, 15, 16, 17, 9, 10, 11, 12, 13, 14, 8, 5, 6, 7, 2]");
|
||||
|
||||
let letter_values = collect_field_values(&index, &txn, "letter", &documents_ids);
|
||||
insta::assert_debug_snapshot!(letter_values, @r###"
|
||||
[
|
||||
"\"I\"",
|
||||
"\"I\"",
|
||||
"\"I\"",
|
||||
"\"H\"",
|
||||
"\"G\"",
|
||||
"\"G\"",
|
||||
"\"F\"",
|
||||
"\"F\"",
|
||||
"\"F\"",
|
||||
"\"E\"",
|
||||
"\"E\"",
|
||||
"\"E\"",
|
||||
"\"E\"",
|
||||
"\"E\"",
|
||||
"\"E\"",
|
||||
"\"D\"",
|
||||
"\"C\"",
|
||||
"\"C\"",
|
||||
"\"C\"",
|
||||
"\"B\"",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank")))]);
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 13, 12, 4, 7, 11, 17, 23, 1, 3, 6, 10, 16, 19, 22, 0, 2, 5, 8, 9]");
|
||||
|
||||
let rank_values = collect_field_values(&index, &txn, "rank", &documents_ids);
|
||||
insta::assert_debug_snapshot!(rank_values, @r###"
|
||||
[
|
||||
"5",
|
||||
"4",
|
||||
"3",
|
||||
"2",
|
||||
"2",
|
||||
"2",
|
||||
"2",
|
||||
"2",
|
||||
"1",
|
||||
"1",
|
||||
"1",
|
||||
"1",
|
||||
"1",
|
||||
"1",
|
||||
"1",
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
"0",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.sort_criteria(vec![AscDesc::Asc(Member::Field(S("vague")))]);
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 4, 5, 22, 23, 13, 1, 3, 12, 21, 11, 20, 6, 7, 8, 9, 10, 14, 15]");
|
||||
|
||||
let vague_values = collect_field_values(&index, &txn, "vague", &documents_ids);
|
||||
insta::assert_debug_snapshot!(vague_values, @r###"
|
||||
[
|
||||
"0",
|
||||
"1",
|
||||
"[1,2]",
|
||||
"[1,\"2\"]",
|
||||
"[1.1367,\"help\",null]",
|
||||
"[1.2367,\"hello\"]",
|
||||
"1.5673",
|
||||
"\"0\"",
|
||||
"\"1\"",
|
||||
"false",
|
||||
"false",
|
||||
"true",
|
||||
"true",
|
||||
"__does_not_exist___",
|
||||
"null",
|
||||
"[null,null,\"\"]",
|
||||
"\"\"",
|
||||
"{\"sub\":0}",
|
||||
"__does_not_exist___",
|
||||
"__does_not_exist___",
|
||||
]
|
||||
"###);
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("vague")))]);
|
||||
|
||||
let SearchResult { documents_ids, .. } = s.execute().unwrap();
|
||||
insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 13, 23, 22, 2, 5, 0, 11, 20, 12, 21, 3, 1, 6, 7, 8, 9, 10, 14, 15]");
|
||||
|
||||
let vague_values = collect_field_values(&index, &txn, "vague", &documents_ids);
|
||||
insta::assert_debug_snapshot!(vague_values, @r###"
|
||||
[
|
||||
"[1,2]",
|
||||
"1.5673",
|
||||
"[1.2367,\"hello\"]",
|
||||
"[1.1367,\"help\",null]",
|
||||
"1",
|
||||
"[1,\"2\"]",
|
||||
"0",
|
||||
"true",
|
||||
"true",
|
||||
"false",
|
||||
"false",
|
||||
"\"1\"",
|
||||
"\"0\"",
|
||||
"__does_not_exist___",
|
||||
"null",
|
||||
"[null,null,\"\"]",
|
||||
"\"\"",
|
||||
"{\"sub\":0}",
|
||||
"__does_not_exist___",
|
||||
"__does_not_exist___",
|
||||
]
|
||||
"###);
|
||||
}
|
Loading…
x
Reference in New Issue
Block a user