replace optional_words by term_matching_strategy

This commit is contained in:
ManyTheFish 2022-08-22 17:37:36 +02:00
parent f9029727e0
commit 5391e3842c
9 changed files with 46 additions and 39 deletions

View File

@ -119,7 +119,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) {
b.iter(|| { b.iter(|| {
let rtxn = index.read_txn().unwrap(); let rtxn = index.read_txn().unwrap();
let mut search = index.search(&rtxn); let mut search = index.search(&rtxn);
search.query(query).optional_words(TermsMatchingStrategy::default()); search.query(query).terms_matching_strategy(TermsMatchingStrategy::default());
if let Some(filter) = conf.filter { if let Some(filter) = conf.filter {
let filter = Filter::from_str(filter).unwrap().unwrap(); let filter = Filter::from_str(filter).unwrap().unwrap();
search.filter(filter); search.filter(filter);

View File

@ -44,7 +44,7 @@ pub struct Search<'a> {
offset: usize, offset: usize,
limit: usize, limit: usize,
sort_criteria: Option<Vec<AscDesc>>, sort_criteria: Option<Vec<AscDesc>>,
optional_words: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,
authorize_typos: bool, authorize_typos: bool,
words_limit: usize, words_limit: usize,
rtxn: &'a heed::RoTxn<'a>, rtxn: &'a heed::RoTxn<'a>,
@ -59,7 +59,7 @@ impl<'a> Search<'a> {
offset: 0, offset: 0,
limit: 20, limit: 20,
sort_criteria: None, sort_criteria: None,
optional_words: TermsMatchingStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(),
authorize_typos: true, authorize_typos: true,
words_limit: 10, words_limit: 10,
rtxn, rtxn,
@ -87,8 +87,8 @@ impl<'a> Search<'a> {
self self
} }
pub fn optional_words(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> { pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> {
self.optional_words = value; self.terms_matching_strategy = value;
self self
} }
@ -119,7 +119,7 @@ impl<'a> Search<'a> {
let (query_tree, primitive_query, matching_words) = match self.query.as_ref() { let (query_tree, primitive_query, matching_words) = match self.query.as_ref() {
Some(query) => { Some(query) => {
let mut builder = QueryTreeBuilder::new(self.rtxn, self.index)?; let mut builder = QueryTreeBuilder::new(self.rtxn, self.index)?;
builder.optional_words(self.optional_words); builder.terms_matching_strategy(self.terms_matching_strategy);
builder.authorize_typos(self.is_typo_authorized()?); builder.authorize_typos(self.is_typo_authorized()?);
@ -259,7 +259,7 @@ impl fmt::Debug for Search<'_> {
offset, offset,
limit, limit,
sort_criteria, sort_criteria,
optional_words, terms_matching_strategy,
authorize_typos, authorize_typos,
words_limit, words_limit,
rtxn: _, rtxn: _,
@ -271,7 +271,7 @@ impl fmt::Debug for Search<'_> {
.field("offset", offset) .field("offset", offset)
.field("limit", limit) .field("limit", limit)
.field("sort_criteria", sort_criteria) .field("sort_criteria", sort_criteria)
.field("optional_words", optional_words) .field("terms_matching_strategy", terms_matching_strategy)
.field("authorize_typos", authorize_typos) .field("authorize_typos", authorize_typos)
.field("words_limit", words_limit) .field("words_limit", words_limit)
.finish() .finish()

View File

@ -162,7 +162,7 @@ trait Context {
pub struct QueryTreeBuilder<'a> { pub struct QueryTreeBuilder<'a> {
rtxn: &'a heed::RoTxn<'a>, rtxn: &'a heed::RoTxn<'a>,
index: &'a Index, index: &'a Index,
optional_words: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,
authorize_typos: bool, authorize_typos: bool,
words_limit: Option<usize>, words_limit: Option<usize>,
exact_words: Option<fst::Set<Cow<'a, [u8]>>>, exact_words: Option<fst::Set<Cow<'a, [u8]>>>,
@ -199,19 +199,22 @@ impl<'a> QueryTreeBuilder<'a> {
Ok(Self { Ok(Self {
rtxn, rtxn,
index, index,
optional_words: TermsMatchingStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(),
authorize_typos: true, authorize_typos: true,
words_limit: None, words_limit: None,
exact_words: index.exact_words(rtxn)?, exact_words: index.exact_words(rtxn)?,
}) })
} }
/// if `optional_words` is set to `false` the query tree will be /// if `terms_matching_strategy` is set to `All` the query tree will be
/// generated forcing all query words to be present in each matching documents /// generated forcing all query words to be present in each matching documents
/// (the criterion `words` will be ignored). /// (the criterion `words` will be ignored).
/// default value if not called: `true` /// default value if not called: `Last`
pub fn optional_words(&mut self, optional_words: TermsMatchingStrategy) -> &mut Self { pub fn terms_matching_strategy(
self.optional_words = optional_words; &mut self,
terms_matching_strategy: TermsMatchingStrategy,
) -> &mut Self {
self.terms_matching_strategy = terms_matching_strategy;
self self
} }
@ -232,7 +235,7 @@ impl<'a> QueryTreeBuilder<'a> {
} }
/// Build the query tree: /// Build the query tree:
/// - if `optional_words` is set to `false` the query tree will be /// - if `terms_matching_strategy` is set to `All` the query tree will be
/// generated forcing all query words to be present in each matching documents /// generated forcing all query words to be present in each matching documents
/// (the criterion `words` will be ignored) /// (the criterion `words` will be ignored)
/// - if `authorize_typos` is set to `false` the query tree will be generated /// - if `authorize_typos` is set to `false` the query tree will be generated
@ -247,7 +250,7 @@ impl<'a> QueryTreeBuilder<'a> {
if !primitive_query.is_empty() { if !primitive_query.is_empty() {
let qt = create_query_tree( let qt = create_query_tree(
self, self,
self.optional_words, self.terms_matching_strategy,
self.authorize_typos, self.authorize_typos,
&primitive_query, &primitive_query,
)?; )?;
@ -332,7 +335,7 @@ fn synonyms(ctx: &impl Context, word: &[&str]) -> heed::Result<Option<Vec<Operat
/// Main function that creates the final query tree from the primitive query. /// Main function that creates the final query tree from the primitive query.
fn create_query_tree( fn create_query_tree(
ctx: &impl Context, ctx: &impl Context,
optional_words: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,
authorize_typos: bool, authorize_typos: bool,
query: &[PrimitiveQueryPart], query: &[PrimitiveQueryPart],
) -> Result<Operation> { ) -> Result<Operation> {
@ -455,7 +458,7 @@ fn create_query_tree(
let mut operation_children = Vec::new(); let mut operation_children = Vec::new();
let mut query = query.to_vec(); let mut query = query.to_vec();
for _ in 0..remove_count { for _ in 0..remove_count {
let pos = match optional_words { let pos = match terms_matching_strategy {
TermsMatchingStrategy::All => return ngrams(ctx, authorize_typos, &query, false), TermsMatchingStrategy::All => return ngrams(ctx, authorize_typos, &query, false),
TermsMatchingStrategy::Any => { TermsMatchingStrategy::Any => {
let operation = Operation::Or( let operation = Operation::Or(
@ -796,15 +799,19 @@ mod test {
impl TestContext { impl TestContext {
fn build<A: AsRef<[u8]>>( fn build<A: AsRef<[u8]>>(
&self, &self,
optional_words: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,
authorize_typos: bool, authorize_typos: bool,
words_limit: Option<usize>, words_limit: Option<usize>,
query: ClassifiedTokenIter<A>, query: ClassifiedTokenIter<A>,
) -> Result<Option<(Operation, PrimitiveQuery)>> { ) -> Result<Option<(Operation, PrimitiveQuery)>> {
let primitive_query = create_primitive_query(query, None, words_limit); let primitive_query = create_primitive_query(query, None, words_limit);
if !primitive_query.is_empty() { if !primitive_query.is_empty() {
let qt = let qt = create_query_tree(
create_query_tree(self, optional_words, authorize_typos, &primitive_query)?; self,
terms_matching_strategy,
authorize_typos,
&primitive_query,
)?;
Ok(Some((qt, primitive_query))) Ok(Some((qt, primitive_query)))
} else { } else {
Ok(None) Ok(None)

View File

@ -1208,7 +1208,7 @@ mod tests {
let mut search = crate::Search::new(&rtxn, &index); let mut search = crate::Search::new(&rtxn, &index);
search.query("document"); search.query("document");
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
// all documents should be returned // all documents should be returned
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap(); let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids.len(), 4); assert_eq!(documents_ids.len(), 4);
@ -1314,7 +1314,7 @@ mod tests {
let mut search = crate::Search::new(&rtxn, &index); let mut search = crate::Search::new(&rtxn, &index);
search.query("document"); search.query("document");
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
// all documents should be returned // all documents should be returned
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap(); let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();
assert_eq!(documents_ids.len(), 4); assert_eq!(documents_ids.len(), 4);
@ -1513,7 +1513,7 @@ mod tests {
let mut search = crate::Search::new(&rtxn, &index); let mut search = crate::Search::new(&rtxn, &index);
search.query("化妆包"); search.query("化妆包");
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
// only 1 document should be returned // only 1 document should be returned
let crate::SearchResult { documents_ids, .. } = search.execute().unwrap(); let crate::SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@ -28,7 +28,7 @@ macro_rules! test_distinct {
search.query(search::TEST_QUERY); search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len()); search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
let SearchResult { documents_ids, candidates, .. } = search.execute().unwrap(); let SearchResult { documents_ids, candidates, .. } = search.execute().unwrap();

View File

@ -19,7 +19,7 @@ macro_rules! test_filter {
search.query(search::TEST_QUERY); search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len()); search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
search.filter(filter_conditions); search.filter(filter_conditions);
let SearchResult { documents_ids, .. } = search.execute().unwrap(); let SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@ -31,7 +31,7 @@ macro_rules! test_criterion {
search.query(search::TEST_QUERY); search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len()); search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos($authorize_typos); search.authorize_typos($authorize_typos);
search.optional_words($optional_word); search.terms_matching_strategy($optional_word);
search.sort_criteria($sort_criteria); search.sort_criteria($sort_criteria);
let SearchResult { documents_ids, .. } = search.execute().unwrap(); let SearchResult { documents_ids, .. } = search.execute().unwrap();
@ -353,7 +353,7 @@ fn criteria_mixup() {
let mut search = Search::new(&mut rtxn, &index); let mut search = Search::new(&mut rtxn, &index);
search.query(search::TEST_QUERY); search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len()); search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.optional_words(ALLOW_OPTIONAL_WORDS); search.terms_matching_strategy(ALLOW_OPTIONAL_WORDS);
search.authorize_typos(ALLOW_TYPOS); search.authorize_typos(ALLOW_TYPOS);
let SearchResult { documents_ids, .. } = search.execute().unwrap(); let SearchResult { documents_ids, .. } = search.execute().unwrap();

View File

@ -15,7 +15,7 @@ fn sort_ranking_rule_missing() {
search.query(search::TEST_QUERY); search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len()); search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
search.sort_criteria(vec![AscDesc::Asc(Member::Field(S("tag")))]); search.sort_criteria(vec![AscDesc::Asc(Member::Field(S("tag")))]);
let result = search.execute(); let result = search.execute();

View File

@ -20,7 +20,7 @@ fn test_typo_tolerance_one_typo() {
search.query("zeal"); search.query("zeal");
search.limit(10); search.limit(10);
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap(); let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1); assert_eq!(result.documents_ids.len(), 1);
@ -29,7 +29,7 @@ fn test_typo_tolerance_one_typo() {
search.query("zean"); search.query("zean");
search.limit(10); search.limit(10);
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap(); let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0); assert_eq!(result.documents_ids.len(), 0);
@ -47,7 +47,7 @@ fn test_typo_tolerance_one_typo() {
search.query("zean"); search.query("zean");
search.limit(10); search.limit(10);
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap(); let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1); assert_eq!(result.documents_ids.len(), 1);
@ -66,7 +66,7 @@ fn test_typo_tolerance_two_typo() {
search.query("zealand"); search.query("zealand");
search.limit(10); search.limit(10);
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap(); let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1); assert_eq!(result.documents_ids.len(), 1);
@ -75,7 +75,7 @@ fn test_typo_tolerance_two_typo() {
search.query("zealemd"); search.query("zealemd");
search.limit(10); search.limit(10);
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap(); let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0); assert_eq!(result.documents_ids.len(), 0);
@ -93,7 +93,7 @@ fn test_typo_tolerance_two_typo() {
search.query("zealemd"); search.query("zealemd");
search.limit(10); search.limit(10);
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap(); let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1); assert_eq!(result.documents_ids.len(), 1);
@ -142,7 +142,7 @@ fn test_typo_disabled_on_word() {
search.query("zealand"); search.query("zealand");
search.limit(10); search.limit(10);
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap(); let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 2); assert_eq!(result.documents_ids.len(), 2);
@ -162,7 +162,7 @@ fn test_typo_disabled_on_word() {
search.query("zealand"); search.query("zealand");
search.limit(10); search.limit(10);
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap(); let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1); assert_eq!(result.documents_ids.len(), 1);
@ -182,7 +182,7 @@ fn test_disable_typo_on_attribute() {
search.query("antebelum"); search.query("antebelum");
search.limit(10); search.limit(10);
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap(); let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 1); assert_eq!(result.documents_ids.len(), 1);
@ -200,7 +200,7 @@ fn test_disable_typo_on_attribute() {
search.query("antebelum"); search.query("antebelum");
search.limit(10); search.limit(10);
search.authorize_typos(true); search.authorize_typos(true);
search.optional_words(TermsMatchingStrategy::default()); search.terms_matching_strategy(TermsMatchingStrategy::default());
let result = search.execute().unwrap(); let result = search.execute().unwrap();
assert_eq!(result.documents_ids.len(), 0); assert_eq!(result.documents_ids.len(), 0);