query exact_word_docids in resolve_query_tree

This commit is contained in:
ad hoc 2022-03-24 19:25:11 +01:00
parent 8d46a5b0b5
commit c4c6e35352
No known key found for this signature in database
GPG Key ID: 4F00A782990CC643
2 changed files with 26 additions and 5 deletions

View File

@ -68,6 +68,7 @@ impl Default for Candidates {
pub trait Context<'c> { pub trait Context<'c> {
fn documents_ids(&self) -> heed::Result<RoaringBitmap>; fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>; fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>; fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
fn word_pair_proximity_docids( fn word_pair_proximity_docids(
&self, &self,
@ -118,6 +119,10 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
self.index.word_docids.get(self.rtxn, &word) self.index.word_docids.get(self.rtxn, &word)
} }
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
self.index.exact_word_docids.get(self.rtxn, &word)
}
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> { fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
self.index.word_prefix_docids.get(self.rtxn, &word) self.index.word_prefix_docids.get(self.rtxn, &word)
} }
@ -400,11 +405,14 @@ fn query_docids(
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for (word, _typo) in words { for (word, _typo) in words {
let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
docids |= current_docids; let exact_current_docids = ctx.exact_word_docids(&word)?.unwrap_or_default();
docids |= current_docids | exact_current_docids;
} }
Ok(docids) Ok(docids)
} else { } else {
Ok(ctx.word_docids(&word)?.unwrap_or_default()) let word_docids = ctx.word_docids(&word)?.unwrap_or_default();
let exact_word_docids = ctx.exact_word_docids(&word)?.unwrap_or_default();
Ok(word_docids | exact_word_docids)
} }
} }
QueryKind::Tolerant { typo, word } => { QueryKind::Tolerant { typo, word } => {
@ -512,6 +520,7 @@ pub mod test {
pub struct TestContext<'t> { pub struct TestContext<'t> {
words_fst: fst::Set<Cow<'t, [u8]>>, words_fst: fst::Set<Cow<'t, [u8]>>,
word_docids: HashMap<String, RoaringBitmap>, word_docids: HashMap<String, RoaringBitmap>,
exact_word_docids: HashMap<String, RoaringBitmap>,
word_prefix_docids: HashMap<String, RoaringBitmap>, word_prefix_docids: HashMap<String, RoaringBitmap>,
word_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>, word_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
word_prefix_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>, word_prefix_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
@ -527,6 +536,10 @@ pub mod test {
Ok(self.word_docids.get(&word.to_string()).cloned()) Ok(self.word_docids.get(&word.to_string()).cloned())
} }
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
Ok(self.exact_word_docids.get(&word.to_string()).cloned())
}
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> { fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
Ok(self.word_prefix_docids.get(&word.to_string()).cloned()) Ok(self.word_prefix_docids.get(&word.to_string()).cloned())
} }
@ -643,6 +656,8 @@ pub mod test {
s("morning") => random_postings(rng, 125), s("morning") => random_postings(rng, 125),
}; };
let exact_word_docids = HashMap::new();
let mut docid_words = HashMap::new(); let mut docid_words = HashMap::new();
for (word, docids) in word_docids.iter() { for (word, docids) in word_docids.iter() {
for docid in docids { for docid in docids {
@ -712,6 +727,7 @@ pub mod test {
TestContext { TestContext {
words_fst, words_fst,
word_docids, word_docids,
exact_word_docids,
word_prefix_docids, word_prefix_docids,
word_pair_proximity_docids, word_pair_proximity_docids,
word_prefix_pair_proximity_docids, word_prefix_pair_proximity_docids,

View File

@ -284,7 +284,7 @@ where
let mut word_pair_proximity_docids = None; let mut word_pair_proximity_docids = None;
let mut word_position_docids = None; let mut word_position_docids = None;
let mut word_docids = None; let mut word_docids = None;
let mut _exact_word_docids = None; let mut exact_word_docids = None;
let mut databases_seen = 0; let mut databases_seen = 0;
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase { (self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
@ -299,7 +299,7 @@ where
word_docids = Some(cloneable_chunk); word_docids = Some(cloneable_chunk);
let cloneable_chunk = let cloneable_chunk =
unsafe { as_cloneable_grenad(&exact_word_docids_reader)? }; unsafe { as_cloneable_grenad(&exact_word_docids_reader)? };
_exact_word_docids = Some(cloneable_chunk); exact_word_docids = Some(cloneable_chunk);
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader } TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader }
} }
TypedChunk::WordPairProximityDocids(chunk) => { TypedChunk::WordPairProximityDocids(chunk) => {
@ -352,6 +352,7 @@ where
self.execute_prefix_databases( self.execute_prefix_databases(
word_docids, word_docids,
exact_word_docids,
word_pair_proximity_docids, word_pair_proximity_docids,
word_position_docids, word_position_docids,
)?; )?;
@ -363,6 +364,7 @@ where
pub fn execute_prefix_databases( pub fn execute_prefix_databases(
self, self,
word_docids: Option<grenad::Reader<CursorClonableMmap>>, word_docids: Option<grenad::Reader<CursorClonableMmap>>,
exact_word_docids: Option<grenad::Reader<CursorClonableMmap>>,
word_pair_proximity_docids: Option<grenad::Reader<CursorClonableMmap>>, word_pair_proximity_docids: Option<grenad::Reader<CursorClonableMmap>>,
word_position_docids: Option<grenad::Reader<CursorClonableMmap>>, word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
) -> Result<()> ) -> Result<()>
@ -433,7 +435,10 @@ where
if let Some(word_docids) = word_docids { if let Some(word_docids) = word_docids {
let mut word_docids_builder = grenad::MergerBuilder::new(merge_nothing as MergeFn); let mut word_docids_builder = grenad::MergerBuilder::new(merge_nothing as MergeFn);
word_docids_builder.push(word_docids.into_cursor()?); word_docids_builder.push(word_docids.into_cursor()?);
// TODO: push exact_word_docids if let Some(exact_word_docids) = exact_word_docids {
word_docids_builder.push(exact_word_docids.into_cursor()?);
}
let word_docids_iter = word_docids_builder.build().into_stream_merger_iter()?; let word_docids_iter = word_docids_builder.build().into_stream_merger_iter()?;
// Run the word prefix docids update operation. // Run the word prefix docids update operation.
let mut builder = WordPrefixDocids::new(self.wtxn, self.index); let mut builder = WordPrefixDocids::new(self.wtxn, self.index);