From 8993fec8a3dd3d2dbf7f681c2d7455aed75cc444 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 24 May 2022 09:15:49 +0200 Subject: [PATCH 1/4] return optional exact words --- milli/src/index.rs | 6 +++--- milli/src/search/query_tree.rs | 14 +++++++------- milli/src/update/settings.rs | 2 +- 3 files changed, 11 insertions(+), 11 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 81648fe1c..41bd85b93 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -1041,10 +1041,10 @@ impl Index { } /// List the words on which typo are not allowed - pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result>> { + pub fn exact_words<'t>(&self, txn: &'t RoTxn) -> Result>>> { match self.main.get::<_, Str, ByteSlice>(txn, main_key::EXACT_WORDS)? { - Some(bytes) => Ok(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?), - None => Ok(fst::Set::default().map_data(Cow::Owned)?), + Some(bytes) => Ok(Some(fst::Set::new(bytes)?.map_data(Cow::Borrowed)?)), + None => Ok(None), } } diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 02fc0747a..2e53971d2 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -152,7 +152,7 @@ trait Context { } /// Returns the minimum word len for 1 and 2 typos. fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>; - fn exact_words(&self) -> crate::Result>>; + fn exact_words(&self) -> crate::Result>>>; } /// The query tree builder is the interface to build a query tree. @@ -183,7 +183,7 @@ impl<'a> Context for QueryTreeBuilder<'a> { Ok((one, two)) } - fn exact_words(&self) -> crate::Result>> { + fn exact_words(&self) -> crate::Result>>> { self.index.exact_words(self.rtxn) } } @@ -277,13 +277,13 @@ pub struct TypoConfig<'a> { pub max_typos: u8, pub word_len_one_typo: u8, pub word_len_two_typo: u8, - pub exact_words: fst::Set>, + pub exact_words: Option>>, } /// Return the `QueryKind` of a word depending on `authorize_typos` /// and the provided word length. fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind { - if authorize_typos && !config.exact_words.contains(&word) { + if authorize_typos && !config.exact_words.map(|s| s.contains(&word)).unwrap_or(false) { let count = word.chars().count().min(u8::MAX as usize) as u8; if count < config.word_len_one_typo { QueryKind::exact(word) @@ -779,8 +779,8 @@ mod test { Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) } - fn exact_words(&self) -> crate::Result>> { - Ok(fst::Set::new(Cow::Borrowed(self.exact_words.as_slice())).unwrap()) + fn exact_words(&self) -> crate::Result>>> { + Ok(Some(fst::Set::new(Cow::Borrowed(self.exact_words.as_slice())).unwrap())) } } @@ -1405,7 +1405,7 @@ mod test { #[test] fn test_min_word_len_typo() { - let exact_words = fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap(); + let exact_words = Some(fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap()); let config = TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7, exact_words }; diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index bd1495b1c..829932d5c 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -1495,7 +1495,7 @@ mod tests { let words = btreeset! { S("Ab"), S("ac") }; builder.set_exact_words(words); assert!(builder.execute(|_| ()).is_ok()); - let exact_words = index.exact_words(&txn).unwrap(); + let exact_words = index.exact_words(&txn).unwrap().unwrap(); for word in exact_words.into_fst().stream().into_str_vec().unwrap() { assert!(word.0 == "ac" || word.0 == "ab"); } From ac975cc747e8b9edc0ddb251efc142607a1b969e Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 24 May 2022 09:43:17 +0200 Subject: [PATCH 2/4] cache context's exact words --- milli/src/search/mod.rs | 2 +- milli/src/search/query_tree.rs | 49 ++++++++++++++++++++++------------ 2 files changed, 33 insertions(+), 18 deletions(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 979ee1e6e..f3f852a48 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -118,7 +118,7 @@ impl<'a> Search<'a> { let before = Instant::now(); let (query_tree, primitive_query, matching_words) = match self.query.as_ref() { Some(query) => { - let mut builder = QueryTreeBuilder::new(self.rtxn, self.index); + let mut builder = QueryTreeBuilder::new(self.rtxn, self.index)?; builder.optional_words(self.optional_words); builder.authorize_typos(self.is_typo_authorized()?); diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 2e53971d2..4c4127dd4 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -152,7 +152,7 @@ trait Context { } /// Returns the minimum word len for 1 and 2 typos. fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>; - fn exact_words(&self) -> crate::Result>>>; + fn exact_words(&self) -> &Option>>; } /// The query tree builder is the interface to build a query tree. @@ -162,6 +162,7 @@ pub struct QueryTreeBuilder<'a> { optional_words: bool, authorize_typos: bool, words_limit: Option, + exact_words: Option>>, } impl<'a> Context for QueryTreeBuilder<'a> { @@ -183,16 +184,24 @@ impl<'a> Context for QueryTreeBuilder<'a> { Ok((one, two)) } - fn exact_words(&self) -> crate::Result>>> { - self.index.exact_words(self.rtxn) + fn exact_words(&self) -> &Option>> { + &self.exact_words } } impl<'a> QueryTreeBuilder<'a> { /// Create a `QueryTreeBuilder` from a heed ReadOnly transaction `rtxn` /// and an Index `index`. - pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Self { - Self { rtxn, index, optional_words: true, authorize_typos: true, words_limit: None } + pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Result { + let exact_words = index.exact_words(rtxn)?; + Ok(Self { + rtxn, + index, + optional_words: true, + authorize_typos: true, + words_limit: None, + exact_words, + }) } /// if `optional_words` is set to `false` the query tree will be @@ -277,13 +286,13 @@ pub struct TypoConfig<'a> { pub max_typos: u8, pub word_len_one_typo: u8, pub word_len_two_typo: u8, - pub exact_words: Option>>, + pub exact_words: &'a Option>>, } /// Return the `QueryKind` of a word depending on `authorize_typos` /// and the provided word length. fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind { - if authorize_typos && !config.exact_words.map(|s| s.contains(&word)).unwrap_or(false) { + if authorize_typos && !config.exact_words.as_ref().map(|s| s.contains(&word)).unwrap_or(false) { let count = word.chars().count().min(u8::MAX as usize) as u8; if count < config.word_len_one_typo { QueryKind::exact(word) @@ -342,7 +351,7 @@ fn create_query_tree( children.push(Operation::Phrase(vec![left.to_string(), right.to_string()])); } let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; - let exact_words = ctx.exact_words()?; + let exact_words = ctx.exact_words(); let config = TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo, exact_words }; children.push(Operation::Query(Query { @@ -396,7 +405,7 @@ fn create_query_tree( let concat = words.concat(); let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; - let exact_words = ctx.exact_words()?; + let exact_words = ctx.exact_words(); let config = TypoConfig { max_typos: 1, word_len_one_typo, @@ -501,7 +510,7 @@ fn create_matching_words( } let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; - let exact_words = ctx.exact_words()?; + let exact_words = ctx.exact_words(); let config = TypoConfig { max_typos: 2, word_len_one_typo, word_len_two_typo, exact_words }; @@ -579,7 +588,7 @@ fn create_matching_words( let word = words.concat(); let (word_len_one_typo, word_len_two_typo) = ctx.min_word_len_for_typo()?; - let exact_words = ctx.exact_words()?; + let exact_words = ctx.exact_words(); let config = TypoConfig { max_typos: 1, word_len_one_typo, @@ -742,8 +751,7 @@ mod test { struct TestContext { synonyms: HashMap, Vec>>, postings: HashMap, - // Raw bytes for the exact word fst Set - exact_words: Vec, + exact_words: Option>>, } impl TestContext { @@ -779,8 +787,8 @@ mod test { Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) } - fn exact_words(&self) -> crate::Result>>> { - Ok(Some(fst::Set::new(Cow::Borrowed(self.exact_words.as_slice())).unwrap())) + fn exact_words(&self) -> &Option>> { + &self.exact_words } } @@ -799,6 +807,8 @@ mod test { } let exact_words = fst::SetBuilder::new(Vec::new()).unwrap().into_inner().unwrap(); + let exact_words = + Some(fst::Set::new(exact_words).unwrap().map_data(Cow::Owned).unwrap()); TestContext { synonyms: hashmap! { @@ -1406,8 +1416,12 @@ mod test { #[test] fn test_min_word_len_typo() { let exact_words = Some(fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap()); - let config = - TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7, exact_words }; + let config = TypoConfig { + max_typos: 2, + word_len_one_typo: 5, + word_len_two_typo: 7, + exact_words: &exact_words, + }; assert_eq!( typos("hello".to_string(), true, config.clone()), @@ -1433,6 +1447,7 @@ mod test { let tokens = result.tokens(); let exact_words = fst::Set::from_iter(Some("goodbye")).unwrap().into_fst().into_inner(); + let exact_words = Some(fst::Set::new(exact_words).unwrap().map_data(Cow::Owned).unwrap()); let context = TestContext { exact_words, ..Default::default() }; let (query_tree, _) = context.build(false, true, Some(2), tokens).unwrap().unwrap(); From 69dc4de80fb3f089bbb04352c3df21762e2b350f Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 24 May 2022 12:14:55 +0200 Subject: [PATCH 3/4] change &Option to Option<&Set> --- milli/src/search/query_tree.rs | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 4c4127dd4..7d2390b39 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -152,7 +152,7 @@ trait Context { } /// Returns the minimum word len for 1 and 2 typos. fn min_word_len_for_typo(&self) -> heed::Result<(u8, u8)>; - fn exact_words(&self) -> &Option>>; + fn exact_words(&self) -> Option<&fst::Set>>; } /// The query tree builder is the interface to build a query tree. @@ -184,8 +184,8 @@ impl<'a> Context for QueryTreeBuilder<'a> { Ok((one, two)) } - fn exact_words(&self) -> &Option>> { - &self.exact_words + fn exact_words(&self) -> Option<&fst::Set>> { + self.exact_words.as_ref() } } @@ -286,7 +286,7 @@ pub struct TypoConfig<'a> { pub max_typos: u8, pub word_len_one_typo: u8, pub word_len_two_typo: u8, - pub exact_words: &'a Option>>, + pub exact_words: Option<&'a fst::Set>>, } /// Return the `QueryKind` of a word depending on `authorize_typos` @@ -787,8 +787,8 @@ mod test { Ok((DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS)) } - fn exact_words(&self) -> &Option>> { - &self.exact_words + fn exact_words(&self) -> Option<&fst::Set>> { + self.exact_words.as_ref() } } @@ -1415,12 +1415,12 @@ mod test { #[test] fn test_min_word_len_typo() { - let exact_words = Some(fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap()); + let exact_words = fst::Set::from_iter([b""]).unwrap().map_data(Cow::Owned).unwrap(); let config = TypoConfig { max_typos: 2, word_len_one_typo: 5, word_len_two_typo: 7, - exact_words: &exact_words, + exact_words: Some(&exact_words), }; assert_eq!( From 25fc576696446a57847d2eb5b263bc319223cddb Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 24 May 2022 14:15:33 +0200 Subject: [PATCH 4/4] review changes --- milli/src/search/query_tree.rs | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/milli/src/search/query_tree.rs b/milli/src/search/query_tree.rs index 7d2390b39..76748179b 100644 --- a/milli/src/search/query_tree.rs +++ b/milli/src/search/query_tree.rs @@ -193,14 +193,13 @@ impl<'a> QueryTreeBuilder<'a> { /// Create a `QueryTreeBuilder` from a heed ReadOnly transaction `rtxn` /// and an Index `index`. pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Result { - let exact_words = index.exact_words(rtxn)?; Ok(Self { rtxn, index, optional_words: true, authorize_typos: true, words_limit: None, - exact_words, + exact_words: index.exact_words(rtxn)?, }) } @@ -292,7 +291,7 @@ pub struct TypoConfig<'a> { /// Return the `QueryKind` of a word depending on `authorize_typos` /// and the provided word length. fn typos<'a>(word: String, authorize_typos: bool, config: TypoConfig<'a>) -> QueryKind { - if authorize_typos && !config.exact_words.as_ref().map(|s| s.contains(&word)).unwrap_or(false) { + if authorize_typos && !config.exact_words.map_or(false, |s| s.contains(&word)) { let count = word.chars().count().min(u8::MAX as usize) as u8; if count < config.word_len_one_typo { QueryKind::exact(word)