From 5391e3842c75afc4b903e548e1db2367e47e194d Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 22 Aug 2022 17:37:36 +0200 Subject: [PATCH] replace optional_words by term_matching_strategy --- benchmarks/benches/utils.rs | 2 +- milli/src/search/mod.rs | 14 +++++------ milli/src/search/query_tree.rs | 33 +++++++++++++++---------- milli/src/update/index_documents/mod.rs | 6 ++--- milli/tests/search/distinct.rs | 2 +- milli/tests/search/filters.rs | 2 +- milli/tests/search/query_criteria.rs | 4 +-- milli/tests/search/sort.rs | 2 +- milli/tests/search/typo_tolerance.rs | 20 +++++++-------- 9 files changed, 46 insertions(+), 39 deletions(-) diff --git a/benchmarks/benches/utils.rs b/benchmarks/benches/utils.rs index 81e50d1bb..a240ce299 100644 --- a/benchmarks/benches/utils.rs +++ b/benchmarks/benches/utils.rs @@ -119,7 +119,7 @@ pub fn run_benches(c: &mut criterion::Criterion, confs: &[Conf]) { b.iter(|| { let rtxn = index.read_txn().unwrap(); let mut search = index.search(&rtxn); - search.query(query).optional_words(TermsMatchingStrategy::default()); + search.query(query).terms_matching_strategy(TermsMatchingStrategy::default()); if let Some(filter) = conf.filter { let filter = Filter::from_str(filter).unwrap().unwrap(); search.filter(filter); diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 3da8823dc..7145c1445 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -44,7 +44,7 @@ pub struct Search<'a> { offset: usize, limit: usize, sort_criteria: Option>, - optional_words: TermsMatchingStrategy, + terms_matching_strategy: TermsMatchingStrategy, authorize_typos: bool, words_limit: usize, rtxn: &'a heed::RoTxn<'a>, @@ -59,7 +59,7 @@ impl<'a> Search<'a> { offset: 0, limit: 20, sort_criteria: None, - optional_words: TermsMatchingStrategy::default(), + terms_matching_strategy: TermsMatchingStrategy::default(), authorize_typos: true, words_limit: 10, rtxn, @@ -87,8 +87,8 @@ impl<'a> Search<'a> { self } - pub fn optional_words(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> { - self.optional_words = value; + pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> { + self.terms_matching_strategy = value; self } @@ -119,7 +119,7 @@ impl<'a> Search<'a> { let (query_tree, primitive_query, matching_words) = match self.query.as_ref() { Some(query) => { let mut builder = QueryTreeBuilder::new(self.rtxn, self.index)?; - builder.optional_words(self.optional_words); + builder.terms_matching_strategy(self.terms_matching_strategy); builder.authorize_typos(self.is_typo_authorized()?); @@ -259,7 +259,7 @@ impl fmt::Debug for Search<'_> { offset, limit, sort_criteria, - optional_words, + terms_matching_strategy, authorize_typos, words_limit, rtxn: _, @@ -271,7 +271,7 @@ impl fmt::Debug for Search<'_> { .field("offset", offset) .field("limit", limit) .field("sort_criteria", sort_criteria) - .field("optional_words", optional_words) + .field("terms_matching_strategy", terms_matching_strategy) .field("authorize_typos", authorize_typos) .field("words_limit", words_limit) .finish() diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 39377d8f9..51774d8b4 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -162,7 +162,7 @@ trait Context { pub struct QueryTreeBuilder<'a> { rtxn: &'a heed::RoTxn<'a>, index: &'a Index, - optional_words: TermsMatchingStrategy, + terms_matching_strategy: TermsMatchingStrategy, authorize_typos: bool, words_limit: Option, exact_words: Option>>, @@ -199,19 +199,22 @@ impl<'a> QueryTreeBuilder<'a> { Ok(Self { rtxn, index, - optional_words: TermsMatchingStrategy::default(), + terms_matching_strategy: TermsMatchingStrategy::default(), authorize_typos: true, words_limit: None, exact_words: index.exact_words(rtxn)?, }) } - /// if `optional_words` is set to `false` the query tree will be + /// if `terms_matching_strategy` is set to `All` the query tree will be /// generated forcing all query words to be present in each matching documents /// (the criterion `words` will be ignored). - /// default value if not called: `true` - pub fn optional_words(&mut self, optional_words: TermsMatchingStrategy) -> &mut Self { - self.optional_words = optional_words; + /// default value if not called: `Last` + pub fn terms_matching_strategy( + &mut self, + terms_matching_strategy: TermsMatchingStrategy, + ) -> &mut Self { + self.terms_matching_strategy = terms_matching_strategy; self } @@ -232,7 +235,7 @@ impl<'a> QueryTreeBuilder<'a> { } /// Build the query tree: - /// - if `optional_words` is set to `false` the query tree will be + /// - if `terms_matching_strategy` is set to `All` the query tree will be /// generated forcing all query words to be present in each matching documents /// (the criterion `words` will be ignored) /// - if `authorize_typos` is set to `false` the query tree will be generated @@ -247,7 +250,7 @@ impl<'a> QueryTreeBuilder<'a> { if !primitive_query.is_empty() { let qt = create_query_tree( self, - self.optional_words, + self.terms_matching_strategy, self.authorize_typos, &primitive_query, )?; @@ -332,7 +335,7 @@ fn synonyms(ctx: &impl Context, word: &[&str]) -> heed::Result Result { @@ -455,7 +458,7 @@ fn create_query_tree( let mut operation_children = Vec::new(); let mut query = query.to_vec(); for _ in 0..remove_count { - let pos = match optional_words { + let pos = match terms_matching_strategy { TermsMatchingStrategy::All => return ngrams(ctx, authorize_typos, &query, false), TermsMatchingStrategy::Any => { let operation = Operation::Or( @@ -796,15 +799,19 @@ mod test { impl TestContext { fn build>( &self, - optional_words: TermsMatchingStrategy, + terms_matching_strategy: TermsMatchingStrategy, authorize_typos: bool, words_limit: Option, query: ClassifiedTokenIter, ) -> Result> { let primitive_query = create_primitive_query(query, None, words_limit); if !primitive_query.is_empty() { - let qt = - create_query_tree(self, optional_words, authorize_typos, &primitive_query)?; + let qt = create_query_tree( + self, + terms_matching_strategy, + authorize_typos, + &primitive_query, + )?; Ok(Some((qt, primitive_query))) } else { Ok(None) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 3a8f961ac..23618b478 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -1208,7 +1208,7 @@ mod tests { let mut search = crate::Search::new(&rtxn, &index); search.query("document"); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); // all documents should be returned let crate::SearchResult { documents_ids, .. } = search.execute().unwrap(); assert_eq!(documents_ids.len(), 4); @@ -1314,7 +1314,7 @@ mod tests { let mut search = crate::Search::new(&rtxn, &index); search.query("document"); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); // all documents should be returned let crate::SearchResult { documents_ids, .. } = search.execute().unwrap(); assert_eq!(documents_ids.len(), 4); @@ -1513,7 +1513,7 @@ mod tests { let mut search = crate::Search::new(&rtxn, &index); search.query("化妆包"); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); // only 1 document should be returned let crate::SearchResult { documents_ids, .. } = search.execute().unwrap(); diff --git a/milli/tests/search/distinct.rs b/milli/tests/search/distinct.rs index 9e9905c3f..64dd16f09 100644 --- a/milli/tests/search/distinct.rs +++ b/milli/tests/search/distinct.rs @@ -28,7 +28,7 @@ macro_rules! test_distinct { search.query(search::TEST_QUERY); search.limit(EXTERNAL_DOCUMENTS_IDS.len()); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); let SearchResult { documents_ids, candidates, .. } = search.execute().unwrap(); diff --git a/milli/tests/search/filters.rs b/milli/tests/search/filters.rs index 675004b56..18de24ac3 100644 --- a/milli/tests/search/filters.rs +++ b/milli/tests/search/filters.rs @@ -19,7 +19,7 @@ macro_rules! test_filter { search.query(search::TEST_QUERY); search.limit(EXTERNAL_DOCUMENTS_IDS.len()); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); search.filter(filter_conditions); let SearchResult { documents_ids, .. } = search.execute().unwrap(); diff --git a/milli/tests/search/query_criteria.rs b/milli/tests/search/query_criteria.rs index 0fce7c6df..8b72c8420 100644 --- a/milli/tests/search/query_criteria.rs +++ b/milli/tests/search/query_criteria.rs @@ -31,7 +31,7 @@ macro_rules! test_criterion { search.query(search::TEST_QUERY); search.limit(EXTERNAL_DOCUMENTS_IDS.len()); search.authorize_typos($authorize_typos); - search.optional_words($optional_word); + search.terms_matching_strategy($optional_word); search.sort_criteria($sort_criteria); let SearchResult { documents_ids, .. } = search.execute().unwrap(); @@ -353,7 +353,7 @@ fn criteria_mixup() { let mut search = Search::new(&mut rtxn, &index); search.query(search::TEST_QUERY); search.limit(EXTERNAL_DOCUMENTS_IDS.len()); - search.optional_words(ALLOW_OPTIONAL_WORDS); + search.terms_matching_strategy(ALLOW_OPTIONAL_WORDS); search.authorize_typos(ALLOW_TYPOS); let SearchResult { documents_ids, .. } = search.execute().unwrap(); diff --git a/milli/tests/search/sort.rs b/milli/tests/search/sort.rs index eca0d2986..16d21eac8 100644 --- a/milli/tests/search/sort.rs +++ b/milli/tests/search/sort.rs @@ -15,7 +15,7 @@ fn sort_ranking_rule_missing() { search.query(search::TEST_QUERY); search.limit(EXTERNAL_DOCUMENTS_IDS.len()); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); search.sort_criteria(vec![AscDesc::Asc(Member::Field(S("tag")))]); let result = search.execute(); diff --git a/milli/tests/search/typo_tolerance.rs b/milli/tests/search/typo_tolerance.rs index 7719cf34d..7dc6b0c4f 100644 --- a/milli/tests/search/typo_tolerance.rs +++ b/milli/tests/search/typo_tolerance.rs @@ -20,7 +20,7 @@ fn test_typo_tolerance_one_typo() { search.query("zeal"); search.limit(10); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); let result = search.execute().unwrap(); assert_eq!(result.documents_ids.len(), 1); @@ -29,7 +29,7 @@ fn test_typo_tolerance_one_typo() { search.query("zean"); search.limit(10); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); let result = search.execute().unwrap(); assert_eq!(result.documents_ids.len(), 0); @@ -47,7 +47,7 @@ fn test_typo_tolerance_one_typo() { search.query("zean"); search.limit(10); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); let result = search.execute().unwrap(); assert_eq!(result.documents_ids.len(), 1); @@ -66,7 +66,7 @@ fn test_typo_tolerance_two_typo() { search.query("zealand"); search.limit(10); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); let result = search.execute().unwrap(); assert_eq!(result.documents_ids.len(), 1); @@ -75,7 +75,7 @@ fn test_typo_tolerance_two_typo() { search.query("zealemd"); search.limit(10); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); let result = search.execute().unwrap(); assert_eq!(result.documents_ids.len(), 0); @@ -93,7 +93,7 @@ fn test_typo_tolerance_two_typo() { search.query("zealemd"); search.limit(10); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); let result = search.execute().unwrap(); assert_eq!(result.documents_ids.len(), 1); @@ -142,7 +142,7 @@ fn test_typo_disabled_on_word() { search.query("zealand"); search.limit(10); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); let result = search.execute().unwrap(); assert_eq!(result.documents_ids.len(), 2); @@ -162,7 +162,7 @@ fn test_typo_disabled_on_word() { search.query("zealand"); search.limit(10); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); let result = search.execute().unwrap(); assert_eq!(result.documents_ids.len(), 1); @@ -182,7 +182,7 @@ fn test_disable_typo_on_attribute() { search.query("antebelum"); search.limit(10); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); let result = search.execute().unwrap(); assert_eq!(result.documents_ids.len(), 1); @@ -200,7 +200,7 @@ fn test_disable_typo_on_attribute() { search.query("antebelum"); search.limit(10); search.authorize_typos(true); - search.optional_words(TermsMatchingStrategy::default()); + search.terms_matching_strategy(TermsMatchingStrategy::default()); let result = search.execute().unwrap(); assert_eq!(result.documents_ids.len(), 0);