Rename TermMatchingPolicies

This commit is contained in:
ManyTheFish 2022-08-18 17:36:08 +02:00
parent 60a7221827
commit 9640976c79
10 changed files with 222 additions and 125 deletions

View file

@ -2,7 +2,7 @@ use std::collections::HashSet;
use big_s::S;
use milli::update::Settings;
use milli::{Criterion, Search, SearchResult};
use milli::{Criterion, Search, SearchResult, TermsMatchingStrategy};
use Criterion::*;
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
@ -28,24 +28,25 @@ macro_rules! test_distinct {
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
let SearchResult { documents_ids, candidates, .. } = search.execute().unwrap();
assert_eq!(candidates.len(), $n_res);
let mut distinct_values = HashSet::new();
let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true, &[])
.into_iter()
.filter_map(|d| {
if distinct_values.contains(&d.$distinct) {
None
} else {
distinct_values.insert(d.$distinct.to_owned());
Some(d.id)
}
})
.collect();
let expected_external_ids: Vec<_> =
search::expected_order(&criteria, true, TermsMatchingStrategy::default(), &[])
.into_iter()
.filter_map(|d| {
if distinct_values.contains(&d.$distinct) {
None
} else {
distinct_values.insert(d.$distinct.to_owned());
Some(d.id)
}
})
.collect();
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
assert_eq!(documents_ids, expected_external_ids);

View file

@ -1,5 +1,5 @@
use either::{Either, Left, Right};
use milli::{Criterion, Filter, Search, SearchResult};
use milli::{Criterion, Filter, Search, SearchResult, TermsMatchingStrategy};
use Criterion::*;
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
@ -19,16 +19,17 @@ macro_rules! test_filter {
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
search.filter(filter_conditions);
let SearchResult { documents_ids, .. } = search.execute().unwrap();
let filtered_ids = search::expected_filtered_ids($filter);
let expected_external_ids: Vec<_> = search::expected_order(&criteria, true, true, &[])
.into_iter()
.filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None })
.collect();
let expected_external_ids: Vec<_> =
search::expected_order(&criteria, true, TermsMatchingStrategy::default(), &[])
.into_iter()
.filter_map(|d| if filtered_ids.contains(&d.id) { Some(d.id) } else { None })
.collect();
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
assert_eq!(documents_ids, expected_external_ids);

View file

@ -8,7 +8,7 @@ use heed::EnvOpenOptions;
use maplit::{hashmap, hashset};
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object};
use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
use serde::{Deserialize, Deserializer};
use slice_group_by::GroupBy;
@ -96,7 +96,7 @@ pub fn internal_to_external_ids(index: &Index, internal_ids: &[DocumentId]) -> V
pub fn expected_order(
criteria: &[Criterion],
authorize_typo: bool,
optional_words: bool,
optional_words: TermsMatchingStrategy,
sort_by: &[AscDesc],
) -> Vec<TestDocument> {
let dataset =
@ -155,9 +155,9 @@ pub fn expected_order(
groups = std::mem::take(&mut new_groups);
}
if authorize_typo && optional_words {
if authorize_typo && optional_words == TermsMatchingStrategy::default() {
groups.into_iter().flatten().collect()
} else if optional_words {
} else if optional_words == TermsMatchingStrategy::default() {
groups.into_iter().flatten().filter(|d| d.typo_rank == 0).collect()
} else if authorize_typo {
groups.into_iter().flatten().filter(|d| d.word_rank == 0).collect()

View file

@ -7,7 +7,7 @@ use itertools::Itertools;
use maplit::hashset;
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult};
use milli::{AscDesc, Criterion, Index, Member, Search, SearchResult, TermsMatchingStrategy};
use rand::Rng;
use Criterion::*;
@ -15,8 +15,8 @@ use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
const ALLOW_TYPOS: bool = true;
const DISALLOW_TYPOS: bool = false;
const ALLOW_OPTIONAL_WORDS: bool = true;
const DISALLOW_OPTIONAL_WORDS: bool = false;
const ALLOW_OPTIONAL_WORDS: TermsMatchingStrategy = TermsMatchingStrategy::Last;
const DISALLOW_OPTIONAL_WORDS: TermsMatchingStrategy = TermsMatchingStrategy::All;
const ASC_DESC_CANDIDATES_THRESHOLD: usize = 1000;
macro_rules! test_criterion {
@ -359,7 +359,7 @@ fn criteria_mixup() {
let SearchResult { documents_ids, .. } = search.execute().unwrap();
let expected_external_ids: Vec<_> =
search::expected_order(&criteria, ALLOW_OPTIONAL_WORDS, ALLOW_TYPOS, &[])
search::expected_order(&criteria, ALLOW_TYPOS, ALLOW_OPTIONAL_WORDS, &[])
.into_iter()
.map(|d| d.id)
.collect();

View file

@ -1,6 +1,6 @@
use big_s::S;
use milli::Criterion::{Attribute, Exactness, Proximity, Typo, Words};
use milli::{AscDesc, Error, Member, Search, UserError};
use milli::{AscDesc, Error, Member, Search, TermsMatchingStrategy, UserError};
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
@ -15,7 +15,7 @@ fn sort_ranking_rule_missing() {
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
search.sort_criteria(vec![AscDesc::Asc(Member::Field(S("tag")))]);
let result = search.execute();

View file

@ -2,7 +2,7 @@ use std::collections::BTreeSet;
use heed::EnvOpenOptions;
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use milli::{Criterion, Index, Search};
use milli::{Criterion, Index, Search, TermsMatchingStrategy};
use serde_json::json;
use tempfile::tempdir;
use Criterion::*;
@ -20,7 +20,7 @@ fn test_typo_tolerance_one_typo() {
search.query("zeal");
search.limit(10);
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -29,7 +29,7 @@ fn test_typo_tolerance_one_typo() {
search.query("zean");
search.limit(10);
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0);
@ -47,7 +47,7 @@ fn test_typo_tolerance_one_typo() {
search.query("zean");
search.limit(10);
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -66,7 +66,7 @@ fn test_typo_tolerance_two_typo() {
search.query("zealand");
search.limit(10);
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -75,7 +75,7 @@ fn test_typo_tolerance_two_typo() {
search.query("zealemd");
search.limit(10);
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0);
@ -93,7 +93,7 @@ fn test_typo_tolerance_two_typo() {
search.query("zealemd");
search.limit(10);
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -142,7 +142,7 @@ fn test_typo_disabled_on_word() {
search.query("zealand");
search.limit(10);
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 2);
@ -162,7 +162,7 @@ fn test_typo_disabled_on_word() {
search.query("zealand");
search.limit(10);
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -182,7 +182,7 @@ fn test_disable_typo_on_attribute() {
search.query("antebelum");
search.limit(10);
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1);
@ -200,7 +200,7 @@ fn test_disable_typo_on_attribute() {
search.query("antebelum");
search.limit(10);
search.authorize_typos(true);
search.optional_words(true);
search.optional_words(TermsMatchingStrategy::default());
let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0);