mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 20:07:09 +02:00
Move crates under a sub folder to clean up the code
This commit is contained in:
parent
30f3c30389
commit
9c1e54a2c8
1062 changed files with 19 additions and 20 deletions
236
crates/milli/tests/search/distinct.rs
Normal file
236
crates/milli/tests/search/distinct.rs
Normal file
|
@ -0,0 +1,236 @@
|
|||
use std::collections::HashSet;
|
||||
|
||||
use big_s::S;
|
||||
use milli::update::Settings;
|
||||
use milli::{Criterion, Search, SearchResult, TermsMatchingStrategy};
|
||||
use Criterion::*;
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
macro_rules! test_distinct {
|
||||
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $offset:expr, $criteria:expr, $n_res:expr) => {
|
||||
#[test]
|
||||
fn $func() {
|
||||
let criteria = $criteria;
|
||||
let index = search::setup_search_index_with_criteria(&criteria);
|
||||
|
||||
// update distinct attribute
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let config = milli::update::IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
builder.set_distinct_field(S(stringify!($distinct)));
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit($limit);
|
||||
search.offset($offset);
|
||||
search.exhaustive_number_hits($exhaustive);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
let SearchResult { documents_ids, candidates, .. } = search.execute().unwrap();
|
||||
|
||||
assert_eq!(candidates.len(), $n_res);
|
||||
|
||||
let mut distinct_values = HashSet::new();
|
||||
let expected_external_ids: Vec<_> =
|
||||
search::expected_order(&criteria, TermsMatchingStrategy::default(), &[])
|
||||
.into_iter()
|
||||
.filter_map(|d| {
|
||||
if distinct_values.contains(&d.$distinct) {
|
||||
None
|
||||
} else {
|
||||
distinct_values.insert(d.$distinct.to_owned());
|
||||
Some(d.id)
|
||||
}
|
||||
})
|
||||
.skip($offset)
|
||||
.take($limit)
|
||||
.collect();
|
||||
|
||||
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
||||
assert_eq!(documents_ids, expected_external_ids);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_distinct!(
|
||||
exhaustive_distinct_string_default_criteria,
|
||||
tag,
|
||||
true,
|
||||
1,
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
exhaustive_distinct_number_default_criteria,
|
||||
asc_desc_rank,
|
||||
true,
|
||||
1,
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
exhaustive_distinct_number_weird_order_criteria,
|
||||
asc_desc_rank,
|
||||
true,
|
||||
0,
|
||||
0,
|
||||
vec![Desc(S("attribute_rank")), Desc(S("exactness_rank")), Exactness, Typo],
|
||||
7
|
||||
);
|
||||
|
||||
test_distinct!(
|
||||
distinct_string_default_criteria,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_default_criteria,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_string_criterion_words,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_criterion_words,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_string_criterion_words_typo,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_criterion_words_typo,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Typo],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_string_criterion_words_proximity,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Proximity],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_criterion_words_proximity,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Proximity],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_string_criterion_words_attribute,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Attribute],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_criterion_words_attribute,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Attribute],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_string_criterion_words_exactness,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Exactness],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_criterion_words_exactness,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
0,
|
||||
vec![Words, Exactness],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
distinct_string_limit_and_offset,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
1,
|
||||
vec![],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
exhaustive_distinct_string_limit_and_offset,
|
||||
tag,
|
||||
true,
|
||||
1,
|
||||
2,
|
||||
vec![],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
distinct_number_limit_and_offset,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
2,
|
||||
vec![],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
// testing: https://github.com/meilisearch/meilisearch/issues/4078
|
||||
exhaustive_distinct_number_limit_and_offset,
|
||||
asc_desc_rank,
|
||||
true,
|
||||
2,
|
||||
4,
|
||||
vec![],
|
||||
7
|
||||
);
|
74
crates/milli/tests/search/facet_distribution.rs
Normal file
74
crates/milli/tests/search/facet_distribution.rs
Normal file
|
@ -0,0 +1,74 @@
|
|||
use std::io::Cursor;
|
||||
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashset;
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{FacetDistribution, Index, Object, OrderBy};
|
||||
use serde_json::Deserializer;
|
||||
|
||||
#[test]
|
||||
fn test_facet_distribution_with_no_facet_values() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_filterable_fields(hashset! {
|
||||
S("genres"),
|
||||
S("tags"),
|
||||
});
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
|
||||
// index documents
|
||||
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
||||
let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
let reader = Cursor::new(
|
||||
r#"{
|
||||
"id": 123,
|
||||
"title": "What a week, hu...",
|
||||
"genres": [],
|
||||
"tags": ["blue"]
|
||||
}
|
||||
{
|
||||
"id": 345,
|
||||
"title": "I am the pig!",
|
||||
"tags": ["red"]
|
||||
}"#,
|
||||
);
|
||||
|
||||
for result in Deserializer::from_reader(reader).into_iter::<Object>() {
|
||||
let object = result.unwrap();
|
||||
documents_builder.append_json_object(&object).unwrap();
|
||||
}
|
||||
|
||||
let vector = documents_builder.into_inner().unwrap();
|
||||
|
||||
// index documents
|
||||
let content = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(content).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut distrib = FacetDistribution::new(&txn, &index);
|
||||
distrib.facets(vec![("genres", OrderBy::default())]);
|
||||
let result = distrib.execute().unwrap();
|
||||
assert_eq!(result["genres"].len(), 0);
|
||||
|
||||
let mut distrib = FacetDistribution::new(&txn, &index);
|
||||
distrib.facets(vec![("tags", OrderBy::default())]);
|
||||
let result = distrib.execute().unwrap();
|
||||
assert_eq!(result["tags"].len(), 2);
|
||||
}
|
104
crates/milli/tests/search/filters.rs
Normal file
104
crates/milli/tests/search/filters.rs
Normal file
|
@ -0,0 +1,104 @@
|
|||
use either::{Either, Left, Right};
|
||||
use milli::{Criterion, Filter, Search, SearchResult, TermsMatchingStrategy};
|
||||
use Criterion::*;
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
macro_rules! test_filter {
|
||||
($func:ident, $filter:expr) => {
|
||||
#[test]
|
||||
fn $func() {
|
||||
let criteria = vec![Words, Typo, Proximity, Attribute, Exactness];
|
||||
let index = search::setup_search_index_with_criteria(&criteria);
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let filter_conditions =
|
||||
Filter::from_array::<Vec<Either<Vec<&str>, &str>>, _>($filter).unwrap().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
search.filter(filter_conditions);
|
||||
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
let filtered_ids = search::expected_filtered_ids($filter);
|
||||
let expected_external_ids: Vec<_> =
|
||||
search::expected_order(&criteria, TermsMatchingStrategy::default(), &[])
|
||||
.into_iter()
|
||||
.filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None })
|
||||
.collect();
|
||||
|
||||
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
||||
assert_eq!(documents_ids, expected_external_ids);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_filter!(eq_simple_string_filter, vec![Right("tag=red")]);
|
||||
test_filter!(eq_simple_number_filter, vec![Right("asc_desc_rank=1")]);
|
||||
test_filter!(eq_string_and_filter_return_empty, vec![Right("tag=red"), Right("tag=green")]);
|
||||
test_filter!(eq_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank=1")]);
|
||||
test_filter!(eq_string_or_filter, vec![Left(vec!["tag=red", "tag=green"])]);
|
||||
test_filter!(eq_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank=1"])]);
|
||||
test_filter!(eq_number_or_filter, vec![Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]);
|
||||
test_filter!(neq_simple_string_filter, vec![Right("tag!=red")]);
|
||||
test_filter!(neq_simple_number_filter, vec![Right("asc_desc_rank!=1")]);
|
||||
test_filter!(neq_simple_string_in_number_column_filter, vec![Right("asc_desc_rank!=red")]);
|
||||
test_filter!(geo_radius, vec![Right("_geoRadius(50.630010347667806, 3.086251829166809, 100000)")]);
|
||||
test_filter!(
|
||||
not_geo_radius,
|
||||
vec![Right("NOT _geoRadius(50.630010347667806, 3.086251829166809, 1000000)")]
|
||||
);
|
||||
test_filter!(eq_complex_filter, vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank=3")]);
|
||||
test_filter!(
|
||||
eq_complex_filter_2,
|
||||
vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank=3", "asc_desc_rank=1"])]
|
||||
);
|
||||
test_filter!(greater_simple_number_filter, vec![Right("asc_desc_rank>1")]);
|
||||
test_filter!(greater_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank>1")]);
|
||||
test_filter!(greater_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank>1"])]);
|
||||
test_filter!(greater_number_or_filter, vec![Left(vec!["asc_desc_rank>3", "asc_desc_rank>1"])]);
|
||||
test_filter!(
|
||||
greater_complex_filter,
|
||||
vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank>3")]
|
||||
);
|
||||
test_filter!(
|
||||
greater_complex_filter_2,
|
||||
vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank>3", "asc_desc_rank>1"])]
|
||||
);
|
||||
test_filter!(lower_simple_number_filter, vec![Right("asc_desc_rank<1")]);
|
||||
test_filter!(lower_mix_and_filter, vec![Right("tag=red"), Right("asc_desc_rank<1")]);
|
||||
test_filter!(lower_mix_or_filter, vec![Left(vec!["tag=red", "asc_desc_rank<1"])]);
|
||||
test_filter!(lower_number_or_filter, vec![Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])]);
|
||||
test_filter!(
|
||||
lower_complex_filter,
|
||||
vec![Left(vec!["tag=red", "tag=green"]), Right("asc_desc_rank<3")]
|
||||
);
|
||||
test_filter!(
|
||||
lower_complex_filter_2,
|
||||
vec![Left(vec!["tag=red", "tag=green"]), Left(vec!["asc_desc_rank<3", "asc_desc_rank<1"])]
|
||||
);
|
||||
test_filter!(exists_filter_1, vec![Right("opt1 EXISTS")]);
|
||||
test_filter!(exists_filter_2, vec![Right("opt1.opt2 EXISTS")]);
|
||||
test_filter!(exists_filter_1_not, vec![Right("opt1 NOT EXISTS")]);
|
||||
test_filter!(exists_filter_1_not_alt, vec![Right("NOT opt1 EXISTS")]);
|
||||
test_filter!(exists_filter_1_double_not, vec![Right("NOT opt1 NOT EXISTS")]);
|
||||
|
||||
test_filter!(null_filter_1, vec![Right("opt1 IS NULL")]);
|
||||
test_filter!(null_filter_2, vec![Right("opt1.opt2 IS NULL")]);
|
||||
test_filter!(null_filter_1_not, vec![Right("opt1 IS NOT NULL")]);
|
||||
test_filter!(null_filter_1_not_alt, vec![Right("NOT opt1 IS NULL")]);
|
||||
test_filter!(null_filter_1_double_not, vec![Right("NOT opt1 IS NOT NULL")]);
|
||||
|
||||
test_filter!(empty_filter_1, vec![Right("opt1 IS EMPTY")]);
|
||||
test_filter!(empty_filter_2, vec![Right("opt1.opt2 IS EMPTY")]);
|
||||
test_filter!(empty_filter_1_not, vec![Right("opt1 IS NOT EMPTY")]);
|
||||
test_filter!(empty_filter_1_not_alt, vec![Right("NOT opt1 IS EMPTY")]);
|
||||
test_filter!(empty_filter_1_double_not, vec![Right("NOT opt1 IS NOT EMPTY")]);
|
||||
|
||||
test_filter!(in_filter, vec![Right("tag_in IN[1, 2, 3, four, five]")]);
|
||||
test_filter!(not_in_filter, vec![Right("tag_in NOT IN[1, 2, 3, four, five]")]);
|
||||
test_filter!(not_not_in_filter, vec![Right("NOT tag_in NOT IN[1, 2, 3, four, five]")]);
|
352
crates/milli/tests/search/mod.rs
Normal file
352
crates/milli/tests/search/mod.rs
Normal file
|
@ -0,0 +1,352 @@
|
|||
use std::cmp::Reverse;
|
||||
use std::collections::HashSet;
|
||||
use std::io::Cursor;
|
||||
|
||||
use big_s::S;
|
||||
use either::{Either, Left, Right};
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::{btreemap, hashset};
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
|
||||
use serde::{Deserialize, Deserializer};
|
||||
use slice_group_by::GroupBy;
|
||||
|
||||
mod distinct;
|
||||
mod facet_distribution;
|
||||
mod filters;
|
||||
mod phrase_search;
|
||||
mod query_criteria;
|
||||
mod sort;
|
||||
mod typo_tolerance;
|
||||
|
||||
pub const TEST_QUERY: &str = "hello world america";
|
||||
|
||||
pub const EXTERNAL_DOCUMENTS_IDS: &[&str; 17] =
|
||||
&["A", "B", "C", "D", "E", "F", "G", "H", "I", "J", "K", "L", "M", "N", "O", "P", "Q"];
|
||||
|
||||
pub const CONTENT: &str = include_str!("../assets/test_set.ndjson");
|
||||
|
||||
pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let config = IndexerConfig::default();
|
||||
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_criteria(criteria.to_vec());
|
||||
builder.set_filterable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
S("_geo"),
|
||||
S("opt1"),
|
||||
S("opt1.opt2"),
|
||||
S("tag_in")
|
||||
});
|
||||
builder.set_sortable_fields(hashset! {
|
||||
S("tag"),
|
||||
S("asc_desc_rank"),
|
||||
});
|
||||
builder.set_synonyms(btreemap! {
|
||||
S("hello") => vec![S("good morning")],
|
||||
S("world") => vec![S("earth")],
|
||||
S("america") => vec![S("the united states")],
|
||||
});
|
||||
builder.set_searchable_fields(vec![S("title"), S("description")]);
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
|
||||
// index documents
|
||||
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||
let mut documents_builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
let reader = Cursor::new(CONTENT.as_bytes());
|
||||
|
||||
for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
|
||||
let object = result.unwrap();
|
||||
documents_builder.append_json_object(&object).unwrap();
|
||||
}
|
||||
|
||||
let vector = documents_builder.into_inner().unwrap();
|
||||
|
||||
// index documents
|
||||
let content = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(content).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
index
|
||||
}
|
||||
|
||||
pub fn internal_to_external_ids(index: &Index, internal_ids: &[DocumentId]) -> Vec<String> {
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let docid_map = index.external_documents_ids();
|
||||
let docid_map: std::collections::HashMap<_, _> = EXTERNAL_DOCUMENTS_IDS
|
||||
.iter()
|
||||
.map(|id| (docid_map.get(&rtxn, id).unwrap().unwrap(), id))
|
||||
.collect();
|
||||
internal_ids.iter().map(|id| docid_map.get(id).unwrap().to_string()).collect()
|
||||
}
|
||||
|
||||
pub fn expected_order(
|
||||
criteria: &[Criterion],
|
||||
optional_words: TermsMatchingStrategy,
|
||||
sort_by: &[AscDesc],
|
||||
) -> Vec<TestDocument> {
|
||||
let dataset =
|
||||
serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();
|
||||
let mut groups: Vec<Vec<TestDocument>> = vec![dataset];
|
||||
|
||||
for criterion in criteria {
|
||||
let mut new_groups = Vec::new();
|
||||
for group in groups.iter_mut() {
|
||||
match criterion {
|
||||
Criterion::Attribute => {
|
||||
group.sort_by_key(|d| d.attribute_rank);
|
||||
new_groups
|
||||
.extend(group.linear_group_by_key(|d| d.attribute_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Exactness => {
|
||||
group.sort_by_key(|d| d.exact_rank);
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.exact_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Proximity => {
|
||||
group.sort_by_key(|d| d.proximity_rank);
|
||||
new_groups
|
||||
.extend(group.linear_group_by_key(|d| d.proximity_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Sort if sort_by == [AscDesc::Asc(Member::Field(S("tag")))] => {
|
||||
group.sort_by_key(|d| d.sort_by_rank);
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Sort if sort_by == [AscDesc::Desc(Member::Field(S("tag")))] => {
|
||||
group.sort_by_key(|d| Reverse(d.sort_by_rank));
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.sort_by_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Typo => {
|
||||
group.sort_by_key(|d| d.typo_rank);
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.typo_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Words => {
|
||||
group.sort_by_key(|d| d.word_rank);
|
||||
new_groups.extend(group.linear_group_by_key(|d| d.word_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Asc(field_name) if field_name == "asc_desc_rank" => {
|
||||
group.sort_by_key(|d| d.asc_desc_rank);
|
||||
new_groups
|
||||
.extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Desc(field_name) if field_name == "asc_desc_rank" => {
|
||||
group.sort_by_key(|d| Reverse(d.asc_desc_rank));
|
||||
new_groups
|
||||
.extend(group.linear_group_by_key(|d| d.asc_desc_rank).map(Vec::from));
|
||||
}
|
||||
Criterion::Asc(_) | Criterion::Desc(_) | Criterion::Sort => {
|
||||
new_groups.push(group.clone())
|
||||
}
|
||||
}
|
||||
}
|
||||
groups = std::mem::take(&mut new_groups);
|
||||
}
|
||||
|
||||
match optional_words {
|
||||
TermsMatchingStrategy::Last => groups.into_iter().flatten().collect(),
|
||||
TermsMatchingStrategy::Frequency => groups.into_iter().flatten().collect(),
|
||||
TermsMatchingStrategy::All => {
|
||||
groups.into_iter().flatten().filter(|d| d.word_rank == 0).collect()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn execute_filter(filter: &str, document: &TestDocument) -> Option<String> {
|
||||
let mut id = None;
|
||||
if let Some((field, filter)) = filter.split_once("!=") {
|
||||
if field == "tag" && document.tag != filter
|
||||
|| (field == "asc_desc_rank"
|
||||
&& Ok(&document.asc_desc_rank) != filter.parse::<u32>().as_ref())
|
||||
{
|
||||
id = Some(document.id.clone())
|
||||
}
|
||||
} else if let Some((field, filter)) = filter.split_once('=') {
|
||||
if field == "tag" && document.tag == filter
|
||||
|| (field == "asc_desc_rank"
|
||||
&& document.asc_desc_rank == filter.parse::<u32>().unwrap())
|
||||
{
|
||||
id = Some(document.id.clone())
|
||||
}
|
||||
} else if let Some(("asc_desc_rank", filter)) = filter.split_once('<') {
|
||||
if document.asc_desc_rank < filter.parse().unwrap() {
|
||||
id = Some(document.id.clone())
|
||||
}
|
||||
} else if let Some(("asc_desc_rank", filter)) = filter.split_once('>') {
|
||||
if document.asc_desc_rank > filter.parse().unwrap() {
|
||||
id = Some(document.id.clone())
|
||||
}
|
||||
} else if filter.starts_with("_geoRadius") {
|
||||
id = (document.geo_rank < 100000).then(|| document.id.clone());
|
||||
} else if filter.starts_with("NOT _geoRadius") {
|
||||
id = (document.geo_rank > 1000000).then(|| document.id.clone());
|
||||
} else if matches!(filter, "opt1 EXISTS" | "NOT opt1 NOT EXISTS") {
|
||||
id = document.opt1.is_some().then(|| document.id.clone());
|
||||
} else if matches!(filter, "NOT opt1 EXISTS" | "opt1 NOT EXISTS") {
|
||||
id = document.opt1.is_none().then(|| document.id.clone());
|
||||
} else if matches!(filter, "opt1.opt2 EXISTS") {
|
||||
if document.opt1opt2.is_some() {
|
||||
id = Some(document.id.clone());
|
||||
} else if let Some(opt1) = &document.opt1 {
|
||||
id = contains_key_rec(opt1, "opt2").then(|| document.id.clone());
|
||||
}
|
||||
} else if matches!(filter, "opt1 IS NULL" | "NOT opt1 IS NOT NULL") {
|
||||
id = document.opt1.as_ref().map_or(false, |v| v.is_null()).then(|| document.id.clone());
|
||||
} else if matches!(filter, "NOT opt1 IS NULL" | "opt1 IS NOT NULL") {
|
||||
id = document.opt1.as_ref().map_or(true, |v| !v.is_null()).then(|| document.id.clone());
|
||||
} else if matches!(filter, "opt1.opt2 IS NULL") {
|
||||
if document.opt1opt2.as_ref().map_or(false, |v| v.is_null()) {
|
||||
id = Some(document.id.clone());
|
||||
} else if let Some(opt1) = &document.opt1 {
|
||||
if !opt1.is_null() {
|
||||
id = contains_null_rec(opt1, "opt2").then(|| document.id.clone());
|
||||
}
|
||||
}
|
||||
} else if matches!(filter, "opt1 IS EMPTY" | "NOT opt1 IS NOT EMPTY") {
|
||||
id = document.opt1.as_ref().map_or(false, is_empty_value).then(|| document.id.clone());
|
||||
} else if matches!(filter, "NOT opt1 IS EMPTY" | "opt1 IS NOT EMPTY") {
|
||||
id = document
|
||||
.opt1
|
||||
.as_ref()
|
||||
.map_or(true, |v| !is_empty_value(v))
|
||||
.then(|| document.id.clone());
|
||||
} else if matches!(filter, "opt1.opt2 IS EMPTY") {
|
||||
if document.opt1opt2.as_ref().map_or(false, is_empty_value) {
|
||||
id = Some(document.id.clone());
|
||||
}
|
||||
} else if matches!(
|
||||
filter,
|
||||
"tag_in IN[1, 2, 3, four, five]" | "NOT tag_in NOT IN[1, 2, 3, four, five]"
|
||||
) {
|
||||
id = matches!(document.id.as_str(), "A" | "B" | "C" | "D" | "E")
|
||||
.then(|| document.id.clone());
|
||||
} else if matches!(filter, "tag_in NOT IN[1, 2, 3, four, five]") {
|
||||
id = (!matches!(document.id.as_str(), "A" | "B" | "C" | "D" | "E"))
|
||||
.then(|| document.id.clone());
|
||||
}
|
||||
id
|
||||
}
|
||||
|
||||
pub fn is_empty_value(v: &serde_json::Value) -> bool {
|
||||
match v {
|
||||
serde_json::Value::String(s) => s.is_empty(),
|
||||
serde_json::Value::Array(a) => a.is_empty(),
|
||||
serde_json::Value::Object(o) => o.is_empty(),
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains_key_rec(v: &serde_json::Value, key: &str) -> bool {
|
||||
match v {
|
||||
serde_json::Value::Array(v) => {
|
||||
for v in v.iter() {
|
||||
if contains_key_rec(v, key) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
serde_json::Value::Object(v) => {
|
||||
for (k, v) in v.iter() {
|
||||
if k == key || contains_key_rec(v, key) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn contains_null_rec(v: &serde_json::Value, key: &str) -> bool {
|
||||
match v {
|
||||
serde_json::Value::Object(v) => {
|
||||
for (k, v) in v.iter() {
|
||||
if k == key && v.is_null() || contains_null_rec(v, key) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
serde_json::Value::Array(v) => {
|
||||
for v in v.iter() {
|
||||
if contains_null_rec(v, key) {
|
||||
return true;
|
||||
}
|
||||
}
|
||||
false
|
||||
}
|
||||
_ => false,
|
||||
}
|
||||
}
|
||||
|
||||
pub fn expected_filtered_ids(filters: Vec<Either<Vec<&str>, &str>>) -> HashSet<String> {
|
||||
let dataset: Vec<TestDocument> =
|
||||
serde_json::Deserializer::from_str(CONTENT).into_iter().map(|r| r.unwrap()).collect();
|
||||
|
||||
let mut filtered_ids: HashSet<_> = dataset.iter().map(|d| d.id.clone()).collect();
|
||||
for either in filters {
|
||||
let ids = match either {
|
||||
Left(array) => array
|
||||
.into_iter()
|
||||
.map(|f| {
|
||||
let ids: HashSet<String> =
|
||||
dataset.iter().filter_map(|d| execute_filter(f, d)).collect();
|
||||
ids
|
||||
})
|
||||
.reduce(|a, b| a.union(&b).cloned().collect())
|
||||
.unwrap(),
|
||||
Right(filter) => {
|
||||
let ids: HashSet<String> =
|
||||
dataset.iter().filter_map(|d| execute_filter(filter, d)).collect();
|
||||
ids
|
||||
}
|
||||
};
|
||||
|
||||
filtered_ids = filtered_ids.intersection(&ids).cloned().collect();
|
||||
}
|
||||
|
||||
filtered_ids
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Deserialize, PartialEq, Eq)]
|
||||
pub struct TestDocument {
|
||||
pub id: String,
|
||||
pub word_rank: u32,
|
||||
pub typo_rank: u32,
|
||||
pub proximity_rank: u32,
|
||||
pub attribute_rank: u32,
|
||||
pub exact_rank: u32,
|
||||
pub asc_desc_rank: u32,
|
||||
pub sort_by_rank: u32,
|
||||
pub geo_rank: u32,
|
||||
pub title: String,
|
||||
pub description: String,
|
||||
pub tag: String,
|
||||
#[serde(default, deserialize_with = "some_option")]
|
||||
pub opt1: Option<serde_json::Value>,
|
||||
#[serde(default, deserialize_with = "some_option", rename = "opt1.opt2")]
|
||||
pub opt1opt2: Option<serde_json::Value>,
|
||||
}
|
||||
|
||||
fn some_option<'de, D>(deserializer: D) -> Result<Option<serde_json::Value>, D::Error>
|
||||
where
|
||||
D: Deserializer<'de>,
|
||||
{
|
||||
let result = serde_json::Value::deserialize(deserializer)?;
|
||||
Ok(Some(result))
|
||||
}
|
46
crates/milli/tests/search/phrase_search.rs
Normal file
46
crates/milli/tests/search/phrase_search.rs
Normal file
|
@ -0,0 +1,46 @@
|
|||
use milli::update::{IndexerConfig, Settings};
|
||||
use milli::{Criterion, Index, Search, TermsMatchingStrategy};
|
||||
|
||||
use crate::search::Criterion::{Attribute, Exactness, Proximity};
|
||||
|
||||
fn set_stop_words(index: &Index, stop_words: &[&str]) {
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let config = IndexerConfig::default();
|
||||
|
||||
let mut builder = Settings::new(&mut wtxn, index, &config);
|
||||
let stop_words = stop_words.iter().map(|s| s.to_string()).collect();
|
||||
builder.set_stop_words(stop_words);
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
}
|
||||
|
||||
fn test_phrase_search_with_stop_words_given_criteria(criteria: &[Criterion]) {
|
||||
let index = super::setup_search_index_with_criteria(criteria);
|
||||
|
||||
// Add stop_words
|
||||
set_stop_words(&index, &["a", "an", "the", "of"]);
|
||||
|
||||
// Phrase search containing stop words
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&txn, &index);
|
||||
search.query("\"the use of force\"");
|
||||
search.limit(10);
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::All);
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
// 1 document should match
|
||||
assert_eq!(result.documents_ids.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_phrase_search_with_stop_words_no_criteria() {
|
||||
let criteria = [];
|
||||
test_phrase_search_with_stop_words_given_criteria(&criteria);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_phrase_search_with_stop_words_all_criteria() {
|
||||
let criteria = [Proximity, Attribute, Exactness];
|
||||
test_phrase_search_with_stop_words_given_criteria(&criteria);
|
||||
}
|
358
crates/milli/tests/search/query_criteria.rs
Normal file
358
crates/milli/tests/search/query_criteria.rs
Normal file
|
@ -0,0 +1,358 @@
|
|||
use std::cmp::Reverse;
|
||||
use std::io::Cursor;
|
||||
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use itertools::Itertools;
|
||||
use maplit::hashset;
|
||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
|
||||
use rand::Rng;
|
||||
use Criterion::*;
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
const ALLOW_OPTIONAL_WORDS: TermsMatchingStrategy = TermsMatchingStrategy::Last;
|
||||
const DISALLOW_OPTIONAL_WORDS: TermsMatchingStrategy = TermsMatchingStrategy::All;
|
||||
const ASC_DESC_CANDIDATES_THRESHOLD: usize = 1000;
|
||||
|
||||
macro_rules! test_criterion {
|
||||
($func:ident, $optional_word:ident, $criteria:expr, $sort_criteria:expr) => {
|
||||
#[test]
|
||||
fn $func() {
|
||||
let criteria = $criteria;
|
||||
let index = search::setup_search_index_with_criteria(&criteria);
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||
search.terms_matching_strategy($optional_word);
|
||||
search.sort_criteria($sort_criteria);
|
||||
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
let expected_external_ids: Vec<_> =
|
||||
search::expected_order(&criteria, $optional_word, &$sort_criteria[..])
|
||||
.into_iter()
|
||||
.map(|d| d.id)
|
||||
.collect();
|
||||
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
||||
assert_eq!(documents_ids, expected_external_ids);
|
||||
}
|
||||
};
|
||||
}
|
||||
|
||||
test_criterion!(none, DISALLOW_OPTIONAL_WORDS, vec![], vec![]);
|
||||
test_criterion!(words, ALLOW_OPTIONAL_WORDS, vec![Words], vec![]);
|
||||
test_criterion!(attribute, DISALLOW_OPTIONAL_WORDS, vec![Attribute], vec![]);
|
||||
test_criterion!(typo, DISALLOW_OPTIONAL_WORDS, vec![Typo], vec![]);
|
||||
test_criterion!(exactness, DISALLOW_OPTIONAL_WORDS, vec![Exactness], vec![]);
|
||||
test_criterion!(proximity, DISALLOW_OPTIONAL_WORDS, vec![Proximity], vec![]);
|
||||
test_criterion!(asc, DISALLOW_OPTIONAL_WORDS, vec![Asc(S("asc_desc_rank"))], vec![]);
|
||||
test_criterion!(desc, DISALLOW_OPTIONAL_WORDS, vec![Desc(S("asc_desc_rank"))], vec![]);
|
||||
test_criterion!(
|
||||
asc_unexisting_field,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
vec![Asc(S("unexisting_field"))],
|
||||
vec![]
|
||||
);
|
||||
|
||||
test_criterion!(
|
||||
desc_unexisting_field,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
vec![Desc(S("unexisting_field"))],
|
||||
vec![]
|
||||
);
|
||||
|
||||
test_criterion!(empty_sort_by, DISALLOW_OPTIONAL_WORDS, vec![Sort], vec![]);
|
||||
test_criterion!(
|
||||
sort_by_asc,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
vec![Sort],
|
||||
vec![AscDesc::Asc(Member::Field(S("tag")))]
|
||||
);
|
||||
test_criterion!(
|
||||
sort_by_desc,
|
||||
DISALLOW_OPTIONAL_WORDS,
|
||||
vec![Sort],
|
||||
vec![AscDesc::Desc(Member::Field(S("tag")))]
|
||||
);
|
||||
test_criterion!(
|
||||
default_criteria_order,
|
||||
ALLOW_OPTIONAL_WORDS,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
vec![]
|
||||
);
|
||||
|
||||
#[test]
|
||||
fn criteria_mixup() {
|
||||
use Criterion::*;
|
||||
let index = search::setup_search_index_with_criteria(&[
|
||||
Words,
|
||||
Attribute,
|
||||
Desc(S("asc_desc_rank")),
|
||||
Exactness,
|
||||
Proximity,
|
||||
Typo,
|
||||
]);
|
||||
|
||||
#[rustfmt::skip]
|
||||
let criteria_mix = {
|
||||
// Criterion doesn't implement Copy, we create a new Criterion using a closure
|
||||
let desc = || Desc(S("asc_desc_rank"));
|
||||
// all possible criteria order
|
||||
vec![
|
||||
vec![Words, Attribute, desc(), Exactness, Proximity, Typo],
|
||||
vec![Words, Attribute, desc(), Exactness, Typo, Proximity],
|
||||
vec![Words, Attribute, desc(), Proximity, Exactness, Typo],
|
||||
vec![Words, Attribute, desc(), Proximity, Typo, Exactness],
|
||||
vec![Words, Attribute, desc(), Typo, Exactness, Proximity],
|
||||
vec![Words, Attribute, desc(), Typo, Proximity, Exactness],
|
||||
vec![Words, Attribute, Exactness, desc(), Proximity, Typo],
|
||||
vec![Words, Attribute, Exactness, desc(), Typo, Proximity],
|
||||
vec![Words, Attribute, Exactness, Proximity, desc(), Typo],
|
||||
vec![Words, Attribute, Exactness, Proximity, Typo, desc()],
|
||||
vec![Words, Attribute, Exactness, Typo, desc(), Proximity],
|
||||
vec![Words, Attribute, Exactness, Typo, Proximity, desc()],
|
||||
vec![Words, Attribute, Proximity, desc(), Exactness, Typo],
|
||||
vec![Words, Attribute, Proximity, desc(), Typo, Exactness],
|
||||
vec![Words, Attribute, Proximity, Exactness, desc(), Typo],
|
||||
vec![Words, Attribute, Proximity, Exactness, Typo, desc()],
|
||||
vec![Words, Attribute, Proximity, Typo, desc(), Exactness],
|
||||
vec![Words, Attribute, Proximity, Typo, Exactness, desc()],
|
||||
vec![Words, Attribute, Typo, desc(), Exactness, Proximity],
|
||||
vec![Words, Attribute, Typo, desc(), Proximity, Exactness],
|
||||
vec![Words, Attribute, Typo, Exactness, desc(), Proximity],
|
||||
vec![Words, Attribute, Typo, Exactness, Proximity, desc()],
|
||||
vec![Words, Attribute, Typo, Proximity, desc(), Exactness],
|
||||
vec![Words, Attribute, Typo, Proximity, Exactness, desc()],
|
||||
vec![Words, desc(), Attribute, Exactness, Proximity, Typo],
|
||||
vec![Words, desc(), Attribute, Exactness, Typo, Proximity],
|
||||
vec![Words, desc(), Attribute, Proximity, Exactness, Typo],
|
||||
vec![Words, desc(), Attribute, Proximity, Typo, Exactness],
|
||||
vec![Words, desc(), Attribute, Typo, Exactness, Proximity],
|
||||
vec![Words, desc(), Attribute, Typo, Proximity, Exactness],
|
||||
vec![Words, desc(), Exactness, Attribute, Proximity, Typo],
|
||||
vec![Words, desc(), Exactness, Attribute, Typo, Proximity],
|
||||
vec![Words, desc(), Exactness, Proximity, Attribute, Typo],
|
||||
vec![Words, desc(), Exactness, Proximity, Typo, Attribute],
|
||||
vec![Words, desc(), Exactness, Typo, Attribute, Proximity],
|
||||
vec![Words, desc(), Exactness, Typo, Proximity, Attribute],
|
||||
vec![Words, desc(), Proximity, Attribute, Exactness, Typo],
|
||||
vec![Words, desc(), Proximity, Attribute, Typo, Exactness],
|
||||
vec![Words, desc(), Proximity, Exactness, Attribute, Typo],
|
||||
vec![Words, desc(), Proximity, Exactness, Typo, Attribute],
|
||||
vec![Words, desc(), Proximity, Typo, Attribute, Exactness],
|
||||
vec![Words, desc(), Proximity, Typo, Exactness, Attribute],
|
||||
vec![Words, desc(), Typo, Attribute, Exactness, Proximity],
|
||||
vec![Words, desc(), Typo, Attribute, Proximity, Exactness],
|
||||
vec![Words, desc(), Typo, Exactness, Attribute, Proximity],
|
||||
vec![Words, desc(), Typo, Exactness, Proximity, Attribute],
|
||||
vec![Words, desc(), Typo, Proximity, Attribute, Exactness],
|
||||
vec![Words, desc(), Typo, Proximity, Exactness, Attribute],
|
||||
vec![Words, Exactness, Attribute, desc(), Proximity, Typo],
|
||||
vec![Words, Exactness, Attribute, desc(), Typo, Proximity],
|
||||
vec![Words, Exactness, Attribute, Proximity, desc(), Typo],
|
||||
vec![Words, Exactness, Attribute, Proximity, Typo, desc()],
|
||||
vec![Words, Exactness, Attribute, Typo, desc(), Proximity],
|
||||
vec![Words, Exactness, Attribute, Typo, Proximity, desc()],
|
||||
vec![Words, Exactness, desc(), Attribute, Proximity, Typo],
|
||||
vec![Words, Exactness, desc(), Attribute, Typo, Proximity],
|
||||
vec![Words, Exactness, desc(), Proximity, Attribute, Typo],
|
||||
vec![Words, Exactness, desc(), Proximity, Typo, Attribute],
|
||||
vec![Words, Exactness, desc(), Typo, Attribute, Proximity],
|
||||
vec![Words, Exactness, desc(), Typo, Proximity, Attribute],
|
||||
vec![Words, Exactness, Proximity, Attribute, desc(), Typo],
|
||||
vec![Words, Exactness, Proximity, Attribute, Typo, desc()],
|
||||
vec![Words, Exactness, Proximity, desc(), Attribute, Typo],
|
||||
vec![Words, Exactness, Proximity, desc(), Typo, Attribute],
|
||||
vec![Words, Exactness, Proximity, Typo, Attribute, desc()],
|
||||
vec![Words, Exactness, Proximity, Typo, desc(), Attribute],
|
||||
vec![Words, Exactness, Typo, Attribute, desc(), Proximity],
|
||||
vec![Words, Exactness, Typo, Attribute, Proximity, desc()],
|
||||
vec![Words, Exactness, Typo, desc(), Attribute, Proximity],
|
||||
vec![Words, Exactness, Typo, desc(), Proximity, Attribute],
|
||||
vec![Words, Exactness, Typo, Proximity, Attribute, desc()],
|
||||
vec![Words, Exactness, Typo, Proximity, desc(), Attribute],
|
||||
vec![Words, Proximity, Attribute, desc(), Exactness, Typo],
|
||||
vec![Words, Proximity, Attribute, desc(), Typo, Exactness],
|
||||
vec![Words, Proximity, Attribute, Exactness, desc(), Typo],
|
||||
vec![Words, Proximity, Attribute, Exactness, Typo, desc()],
|
||||
vec![Words, Proximity, Attribute, Typo, desc(), Exactness],
|
||||
vec![Words, Proximity, Attribute, Typo, Exactness, desc()],
|
||||
vec![Words, Proximity, desc(), Attribute, Exactness, Typo],
|
||||
vec![Words, Proximity, desc(), Attribute, Typo, Exactness],
|
||||
vec![Words, Proximity, desc(), Exactness, Attribute, Typo],
|
||||
vec![Words, Proximity, desc(), Exactness, Typo, Attribute],
|
||||
vec![Words, Proximity, desc(), Typo, Attribute, Exactness],
|
||||
vec![Words, Proximity, desc(), Typo, Exactness, Attribute],
|
||||
vec![Words, Proximity, Exactness, Attribute, desc(), Typo],
|
||||
vec![Words, Proximity, Exactness, Attribute, Typo, desc()],
|
||||
vec![Words, Proximity, Exactness, desc(), Attribute, Typo],
|
||||
vec![Words, Proximity, Exactness, desc(), Typo, Attribute],
|
||||
vec![Words, Proximity, Exactness, Typo, Attribute, desc()],
|
||||
vec![Words, Proximity, Exactness, Typo, desc(), Attribute],
|
||||
vec![Words, Proximity, Typo, Attribute, desc(), Exactness],
|
||||
vec![Words, Proximity, Typo, Attribute, Exactness, desc()],
|
||||
vec![Words, Proximity, Typo, desc(), Attribute, Exactness],
|
||||
vec![Words, Proximity, Typo, desc(), Exactness, Attribute],
|
||||
vec![Words, Proximity, Typo, Exactness, Attribute, desc()],
|
||||
vec![Words, Proximity, Typo, Exactness, desc(), Attribute],
|
||||
vec![Words, Typo, Attribute, desc(), Exactness, Proximity],
|
||||
vec![Words, Typo, Attribute, desc(), Proximity, Exactness],
|
||||
vec![Words, Typo, Attribute, Exactness, desc(), Proximity],
|
||||
vec![Words, Typo, Attribute, Exactness, Proximity, desc()],
|
||||
vec![Words, Typo, Attribute, Proximity, desc(), Exactness],
|
||||
vec![Words, Typo, Attribute, Proximity, Exactness, desc()],
|
||||
vec![Words, Typo, desc(), Attribute, Proximity, Exactness],
|
||||
vec![Words, Typo, desc(), Exactness, Attribute, Proximity],
|
||||
vec![Words, Typo, desc(), Exactness, Attribute, Proximity],
|
||||
vec![Words, Typo, desc(), Exactness, Proximity, Attribute],
|
||||
vec![Words, Typo, desc(), Proximity, Attribute, Exactness],
|
||||
vec![Words, Typo, desc(), Proximity, Exactness, Attribute],
|
||||
vec![Words, Typo, Exactness, Attribute, desc(), Proximity],
|
||||
vec![Words, Typo, Exactness, Attribute, Proximity, desc()],
|
||||
vec![Words, Typo, Exactness, desc(), Attribute, Proximity],
|
||||
vec![Words, Typo, Exactness, desc(), Proximity, Attribute],
|
||||
vec![Words, Typo, Exactness, Proximity, Attribute, desc()],
|
||||
vec![Words, Typo, Exactness, Proximity, desc(), Attribute],
|
||||
vec![Words, Typo, Proximity, Attribute, desc(), Exactness],
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness, desc()],
|
||||
vec![Words, Typo, Proximity, desc(), Attribute, Exactness],
|
||||
vec![Words, Typo, Proximity, desc(), Exactness, Attribute],
|
||||
vec![Words, Typo, Proximity, Exactness, Attribute, desc()],
|
||||
vec![Words, Typo, Proximity, Exactness, desc(), Attribute],
|
||||
]
|
||||
};
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
for criteria in criteria_mix {
|
||||
eprintln!("Testing with criteria order: {:?}", &criteria);
|
||||
//update criteria
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
builder.set_criteria(criteria.clone());
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||
search.terms_matching_strategy(ALLOW_OPTIONAL_WORDS);
|
||||
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
let expected_external_ids: Vec<_> =
|
||||
search::expected_order(&criteria, ALLOW_OPTIONAL_WORDS, &[])
|
||||
.into_iter()
|
||||
.map(|d| d.id)
|
||||
.collect();
|
||||
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
||||
|
||||
assert_eq!(documents_ids, expected_external_ids);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn criteria_ascdesc() {
|
||||
let path = tempfile::tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(12 * 1024 * 1024); // 10 MB
|
||||
let index = Index::new(options, &path).unwrap();
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let config = IndexerConfig::default();
|
||||
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
|
||||
builder.set_sortable_fields(hashset! {
|
||||
S("name"),
|
||||
S("age"),
|
||||
});
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
|
||||
// index documents
|
||||
let config = IndexerConfig { max_memory: Some(10 * 1024 * 1024), ..Default::default() };
|
||||
let indexing_config = IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
|
||||
let builder =
|
||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||
|
||||
let mut batch_builder = DocumentsBatchBuilder::new(Vec::new());
|
||||
|
||||
(0..ASC_DESC_CANDIDATES_THRESHOLD + 1).for_each(|_| {
|
||||
let mut rng = rand::thread_rng();
|
||||
|
||||
let age = rng.gen::<u32>().to_string();
|
||||
let name = rng
|
||||
.sample_iter(&rand::distributions::Alphanumeric)
|
||||
.map(char::from)
|
||||
.filter(|c| *c >= 'a' && *c <= 'z')
|
||||
.take(10)
|
||||
.collect::<String>();
|
||||
|
||||
let json = serde_json::json!({
|
||||
"name": name,
|
||||
"age": age,
|
||||
});
|
||||
|
||||
let object = match json {
|
||||
serde_json::Value::Object(object) => object,
|
||||
_ => panic!(),
|
||||
};
|
||||
|
||||
batch_builder.append_json_object(&object).unwrap();
|
||||
});
|
||||
|
||||
let vector = batch_builder.into_inner().unwrap();
|
||||
|
||||
let reader = DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
|
||||
let (builder, user_error) = builder.add_documents(reader).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let documents = index.all_documents(&rtxn).unwrap().map(|doc| doc.unwrap()).collect::<Vec<_>>();
|
||||
|
||||
for criterion in [Asc(S("name")), Desc(S("name")), Asc(S("age")), Desc(S("age"))] {
|
||||
eprintln!("Testing with criterion: {:?}", &criterion);
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut builder = Settings::new(&mut wtxn, &index, &config);
|
||||
builder.set_criteria(vec![criterion.clone()]);
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.limit(ASC_DESC_CANDIDATES_THRESHOLD + 1);
|
||||
|
||||
let SearchResult { documents_ids, .. } = search.execute().unwrap();
|
||||
|
||||
let expected_document_ids = match criterion {
|
||||
Asc(field_name) if field_name == "name" => {
|
||||
documents.iter().sorted_by_key(|(_, obkv)| obkv.get(0).unwrap())
|
||||
}
|
||||
Desc(field_name) if field_name == "name" => {
|
||||
documents.iter().sorted_by_key(|(_, obkv)| Reverse(obkv.get(0).unwrap()))
|
||||
}
|
||||
Asc(field_name) if field_name == "name" => {
|
||||
documents.iter().sorted_by_key(|(_, obkv)| obkv.get(1).unwrap())
|
||||
}
|
||||
Desc(field_name) if field_name == "name" => {
|
||||
documents.iter().sorted_by_key(|(_, obkv)| Reverse(obkv.get(1).unwrap()))
|
||||
}
|
||||
_ => continue,
|
||||
}
|
||||
.map(|(id, _)| *id)
|
||||
.collect::<Vec<_>>();
|
||||
|
||||
assert_eq!(documents_ids, expected_document_ids);
|
||||
}
|
||||
}
|
23
crates/milli/tests/search/sort.rs
Normal file
23
crates/milli/tests/search/sort.rs
Normal file
|
@ -0,0 +1,23 @@
|
|||
use big_s::S;
|
||||
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
|
||||
use milli::{AscDesc, Error, Member, Search, TermsMatchingStrategy, UserError};
|
||||
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
#[test]
|
||||
fn sort_ranking_rule_missing() {
|
||||
let criteria = vec![Words, Typo, Proximity, Attribute, Exactness];
|
||||
// sortables: `tag` and `asc_desc_rank`
|
||||
let index = search::setup_search_index_with_criteria(&criteria);
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
search.sort_criteria(vec![AscDesc::Asc(Member::Field(S("tag")))]);
|
||||
|
||||
let result = search.execute();
|
||||
assert!(matches!(result, Err(Error::UserError(UserError::SortRankingRuleMissing))));
|
||||
}
|
208
crates/milli/tests/search/typo_tolerance.rs
Normal file
208
crates/milli/tests/search/typo_tolerance.rs
Normal file
|
@ -0,0 +1,208 @@
|
|||
use std::collections::BTreeSet;
|
||||
|
||||
use heed::EnvOpenOptions;
|
||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use milli::{Criterion, Index, Search, TermsMatchingStrategy};
|
||||
use serde_json::json;
|
||||
use tempfile::tempdir;
|
||||
use Criterion::*;
|
||||
|
||||
#[test]
|
||||
fn test_typo_tolerance_one_typo() {
|
||||
let criteria = [Typo];
|
||||
let index = super::setup_search_index_with_criteria(&criteria);
|
||||
|
||||
// basic typo search with default typo settings
|
||||
{
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&txn, &index);
|
||||
search.query("zeal");
|
||||
search.limit(10);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 1);
|
||||
|
||||
let mut search = Search::new(&txn, &index);
|
||||
search.query("zean");
|
||||
search.limit(10);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 0);
|
||||
}
|
||||
|
||||
let mut txn = index.write_txn().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||
builder.set_min_word_len_one_typo(4);
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
|
||||
// typo is now supported for 4 letters words
|
||||
let mut search = Search::new(&txn, &index);
|
||||
search.query("zean");
|
||||
search.limit(10);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typo_tolerance_two_typo() {
|
||||
let criteria = [Typo];
|
||||
let index = super::setup_search_index_with_criteria(&criteria);
|
||||
|
||||
// basic typo search with default typo settings
|
||||
{
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&txn, &index);
|
||||
search.query("zealand");
|
||||
search.limit(10);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 1);
|
||||
|
||||
let mut search = Search::new(&txn, &index);
|
||||
search.query("zealemd");
|
||||
search.limit(10);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 0);
|
||||
}
|
||||
|
||||
let mut txn = index.write_txn().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||
builder.set_min_word_len_two_typos(7);
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
|
||||
// typo is now supported for 4 letters words
|
||||
let mut search = Search::new(&txn, &index);
|
||||
search.query("zealemd");
|
||||
search.limit(10);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_typo_disabled_on_word() {
|
||||
let tmp = tempdir().unwrap();
|
||||
let mut options = EnvOpenOptions::new();
|
||||
options.map_size(4096 * 100);
|
||||
let index = Index::new(options, tmp.path()).unwrap();
|
||||
|
||||
let mut builder = milli::documents::DocumentsBatchBuilder::new(Vec::new());
|
||||
let doc1 = json!({
|
||||
"id": 1usize,
|
||||
"data": "zealand",
|
||||
});
|
||||
|
||||
let doc2 = json!({
|
||||
"id": 2usize,
|
||||
"data": "zearand",
|
||||
});
|
||||
|
||||
builder.append_json_object(doc1.as_object().unwrap()).unwrap();
|
||||
builder.append_json_object(doc2.as_object().unwrap()).unwrap();
|
||||
let vector = builder.into_inner().unwrap();
|
||||
|
||||
let documents =
|
||||
milli::documents::DocumentsBatchReader::from_reader(std::io::Cursor::new(vector)).unwrap();
|
||||
|
||||
let mut txn = index.write_txn().unwrap();
|
||||
let config = IndexerConfig::default();
|
||||
let indexing_config = IndexDocumentsConfig::default();
|
||||
let builder =
|
||||
IndexDocuments::new(&mut txn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||
|
||||
let (builder, user_error) = builder.add_documents(documents).unwrap();
|
||||
user_error.unwrap();
|
||||
builder.execute().unwrap();
|
||||
txn.commit().unwrap();
|
||||
|
||||
// basic typo search with default typo settings
|
||||
{
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&txn, &index);
|
||||
search.query("zealand");
|
||||
search.limit(10);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 2);
|
||||
}
|
||||
|
||||
let mut txn = index.write_txn().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||
let mut exact_words = BTreeSet::new();
|
||||
// `zealand` doesn't allow typos anymore
|
||||
exact_words.insert("zealand".to_string());
|
||||
builder.set_exact_words(exact_words);
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
|
||||
let mut search = Search::new(&txn, &index);
|
||||
search.query("zealand");
|
||||
search.limit(10);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 1);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_disable_typo_on_attribute() {
|
||||
let criteria = [Typo];
|
||||
let index = super::setup_search_index_with_criteria(&criteria);
|
||||
|
||||
// basic typo search with default typo settings
|
||||
{
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let mut search = Search::new(&txn, &index);
|
||||
// typo in `antebel(l)um`
|
||||
search.query("antebelum");
|
||||
search.limit(10);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 1);
|
||||
}
|
||||
|
||||
let mut txn = index.write_txn().unwrap();
|
||||
|
||||
let config = IndexerConfig::default();
|
||||
let mut builder = Settings::new(&mut txn, &index, &config);
|
||||
// disable typos on `description`
|
||||
builder.set_exact_attributes(vec!["description".to_string()].into_iter().collect());
|
||||
builder.execute(|_| (), || false).unwrap();
|
||||
|
||||
let mut search = Search::new(&txn, &index);
|
||||
search.query("antebelum");
|
||||
search.limit(10);
|
||||
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
let result = search.execute().unwrap();
|
||||
assert_eq!(result.documents_ids.len(), 0);
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue