From c4c6e3535290c88016e6a74f0f015563432e7fc9 Mon Sep 17 00:00:00 2001 From: ad hoc Date: Thu, 24 Mar 2022 19:25:11 +0100 Subject: [PATCH] query exact_word_docids in resolve_query_tree --- milli/src/search/criteria/mod.rs | 20 ++++++++++++++++++-- milli/src/update/index_documents/mod.rs | 11 ++++++++--- 2 files changed, 26 insertions(+), 5 deletions(-) diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 1dbfd2524..df9189239 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -68,6 +68,7 @@ impl Default for Candidates { pub trait Context<'c> { fn documents_ids(&self) -> heed::Result; fn word_docids(&self, word: &str) -> heed::Result>; + fn exact_word_docids(&self, word: &str) -> heed::Result>; fn word_prefix_docids(&self, word: &str) -> heed::Result>; fn word_pair_proximity_docids( &self, @@ -118,6 +119,10 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> { self.index.word_docids.get(self.rtxn, &word) } + fn exact_word_docids(&self, word: &str) -> heed::Result> { + self.index.exact_word_docids.get(self.rtxn, &word) + } + fn word_prefix_docids(&self, word: &str) -> heed::Result> { self.index.word_prefix_docids.get(self.rtxn, &word) } @@ -400,11 +405,14 @@ fn query_docids( let mut docids = RoaringBitmap::new(); for (word, _typo) in words { let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); - docids |= current_docids; + let exact_current_docids = ctx.exact_word_docids(&word)?.unwrap_or_default(); + docids |= current_docids | exact_current_docids; } Ok(docids) } else { - Ok(ctx.word_docids(&word)?.unwrap_or_default()) + let word_docids = ctx.word_docids(&word)?.unwrap_or_default(); + let exact_word_docids = ctx.exact_word_docids(&word)?.unwrap_or_default(); + Ok(word_docids | exact_word_docids) } } QueryKind::Tolerant { typo, word } => { @@ -512,6 +520,7 @@ pub mod test { pub struct TestContext<'t> { words_fst: fst::Set>, word_docids: HashMap, + exact_word_docids: HashMap, word_prefix_docids: HashMap, word_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>, word_prefix_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>, @@ -527,6 +536,10 @@ pub mod test { Ok(self.word_docids.get(&word.to_string()).cloned()) } + fn exact_word_docids(&self, word: &str) -> heed::Result> { + Ok(self.exact_word_docids.get(&word.to_string()).cloned()) + } + fn word_prefix_docids(&self, word: &str) -> heed::Result> { Ok(self.word_prefix_docids.get(&word.to_string()).cloned()) } @@ -643,6 +656,8 @@ pub mod test { s("morning") => random_postings(rng, 125), }; + let exact_word_docids = HashMap::new(); + let mut docid_words = HashMap::new(); for (word, docids) in word_docids.iter() { for docid in docids { @@ -712,6 +727,7 @@ pub mod test { TestContext { words_fst, word_docids, + exact_word_docids, word_prefix_docids, word_pair_proximity_docids, word_prefix_pair_proximity_docids, diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index c490e93da..54d30f8fb 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -284,7 +284,7 @@ where let mut word_pair_proximity_docids = None; let mut word_position_docids = None; let mut word_docids = None; - let mut _exact_word_docids = None; + let mut exact_word_docids = None; let mut databases_seen = 0; (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase { @@ -299,7 +299,7 @@ where word_docids = Some(cloneable_chunk); let cloneable_chunk = unsafe { as_cloneable_grenad(&exact_word_docids_reader)? }; - _exact_word_docids = Some(cloneable_chunk); + exact_word_docids = Some(cloneable_chunk); TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } } TypedChunk::WordPairProximityDocids(chunk) => { @@ -352,6 +352,7 @@ where self.execute_prefix_databases( word_docids, + exact_word_docids, word_pair_proximity_docids, word_position_docids, )?; @@ -363,6 +364,7 @@ where pub fn execute_prefix_databases( self, word_docids: Option>, + exact_word_docids: Option>, word_pair_proximity_docids: Option>, word_position_docids: Option>, ) -> Result<()> @@ -433,7 +435,10 @@ where if let Some(word_docids) = word_docids { let mut word_docids_builder = grenad::MergerBuilder::new(merge_nothing as MergeFn); word_docids_builder.push(word_docids.into_cursor()?); - // TODO: push exact_word_docids + if let Some(exact_word_docids) = exact_word_docids { + word_docids_builder.push(exact_word_docids.into_cursor()?); + } + let word_docids_iter = word_docids_builder.build().into_stream_merger_iter()?; // Run the word prefix docids update operation. let mut builder = WordPrefixDocids::new(self.wtxn, self.index);