query exact_word_docids in resolve_query_tree

This commit is contained in:
ad hoc 2022-03-24 19:25:11 +01:00
parent 8d46a5b0b5
commit c4c6e35352
No known key found for this signature in database
GPG Key ID: 4F00A782990CC643
2 changed files with 26 additions and 5 deletions

View File

@ -68,6 +68,7 @@ impl Default for Candidates {
pub trait Context<'c> {
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
fn word_pair_proximity_docids(
&self,
@ -118,6 +119,10 @@ impl<'c> Context<'c> for CriteriaBuilder<'c> {
self.index.word_docids.get(self.rtxn, &word)
}
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
self.index.exact_word_docids.get(self.rtxn, &word)
}
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
self.index.word_prefix_docids.get(self.rtxn, &word)
}
@ -400,11 +405,14 @@ fn query_docids(
let mut docids = RoaringBitmap::new();
for (word, _typo) in words {
let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
docids |= current_docids;
let exact_current_docids = ctx.exact_word_docids(&word)?.unwrap_or_default();
docids |= current_docids | exact_current_docids;
}
Ok(docids)
} else {
Ok(ctx.word_docids(&word)?.unwrap_or_default())
let word_docids = ctx.word_docids(&word)?.unwrap_or_default();
let exact_word_docids = ctx.exact_word_docids(&word)?.unwrap_or_default();
Ok(word_docids | exact_word_docids)
}
}
QueryKind::Tolerant { typo, word } => {
@ -512,6 +520,7 @@ pub mod test {
pub struct TestContext<'t> {
words_fst: fst::Set<Cow<'t, [u8]>>,
word_docids: HashMap<String, RoaringBitmap>,
exact_word_docids: HashMap<String, RoaringBitmap>,
word_prefix_docids: HashMap<String, RoaringBitmap>,
word_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
word_prefix_pair_proximity_docids: HashMap<(String, String, i32), RoaringBitmap>,
@ -527,6 +536,10 @@ pub mod test {
Ok(self.word_docids.get(&word.to_string()).cloned())
}
fn exact_word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
Ok(self.exact_word_docids.get(&word.to_string()).cloned())
}
fn word_prefix_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>> {
Ok(self.word_prefix_docids.get(&word.to_string()).cloned())
}
@ -643,6 +656,8 @@ pub mod test {
s("morning") => random_postings(rng, 125),
};
let exact_word_docids = HashMap::new();
let mut docid_words = HashMap::new();
for (word, docids) in word_docids.iter() {
for docid in docids {
@ -712,6 +727,7 @@ pub mod test {
TestContext {
words_fst,
word_docids,
exact_word_docids,
word_prefix_docids,
word_pair_proximity_docids,
word_prefix_pair_proximity_docids,

View File

@ -284,7 +284,7 @@ where
let mut word_pair_proximity_docids = None;
let mut word_position_docids = None;
let mut word_docids = None;
let mut _exact_word_docids = None;
let mut exact_word_docids = None;
let mut databases_seen = 0;
(self.progress)(UpdateIndexingStep::MergeDataIntoFinalDatabase {
@ -299,7 +299,7 @@ where
word_docids = Some(cloneable_chunk);
let cloneable_chunk =
unsafe { as_cloneable_grenad(&exact_word_docids_reader)? };
_exact_word_docids = Some(cloneable_chunk);
exact_word_docids = Some(cloneable_chunk);
TypedChunk::WordDocids { word_docids_reader, exact_word_docids_reader }
}
TypedChunk::WordPairProximityDocids(chunk) => {
@ -352,6 +352,7 @@ where
self.execute_prefix_databases(
word_docids,
exact_word_docids,
word_pair_proximity_docids,
word_position_docids,
)?;
@ -363,6 +364,7 @@ where
pub fn execute_prefix_databases(
self,
word_docids: Option<grenad::Reader<CursorClonableMmap>>,
exact_word_docids: Option<grenad::Reader<CursorClonableMmap>>,
word_pair_proximity_docids: Option<grenad::Reader<CursorClonableMmap>>,
word_position_docids: Option<grenad::Reader<CursorClonableMmap>>,
) -> Result<()>
@ -433,7 +435,10 @@ where
if let Some(word_docids) = word_docids {
let mut word_docids_builder = grenad::MergerBuilder::new(merge_nothing as MergeFn);
word_docids_builder.push(word_docids.into_cursor()?);
// TODO: push exact_word_docids
if let Some(exact_word_docids) = exact_word_docids {
word_docids_builder.push(exact_word_docids.into_cursor()?);
}
let word_docids_iter = word_docids_builder.build().into_stream_merger_iter()?;
// Run the word prefix docids update operation.
let mut builder = WordPrefixDocids::new(self.wtxn, self.index);