Fix bug in computation of query term at a position

This commit is contained in:
Loïc Lecrenier 2023-05-01 16:26:01 +02:00
parent 11f814821d
commit 1b514517f5
5 changed files with 84 additions and 40 deletions

View file

@ -69,16 +69,14 @@ pub fn compute_query_term_subset_docids_within_field_id(
}
for phrase in term.all_phrases(ctx)? {
let mut phrase_docids = ctx.get_phrase_docids(phrase)?.clone();
// There may be false positives when resolving a phrase, so we're not
// guaranteed that all of its words are within a single fid.
// TODO: fix this?
if let Some(word) = phrase.words(ctx).iter().flatten().next() {
if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(*word, fid)? {
phrase_docids &= word_fid_docids;
docids |= ctx.get_phrase_docids(phrase)? & word_fid_docids;
}
}
docids |= phrase_docids;
}
if let Some(word_prefix) = term.use_prefix_db(ctx) {
@ -98,7 +96,6 @@ pub fn compute_query_term_subset_docids_within_position(
position: u16,
) -> Result<RoaringBitmap> {
// TODO Use the roaring::MultiOps trait
let mut docids = RoaringBitmap::new();
for word in term.all_single_words_except_prefix_db(ctx)? {
if let Some(word_position_docids) =
@ -109,16 +106,14 @@ pub fn compute_query_term_subset_docids_within_position(
}
for phrase in term.all_phrases(ctx)? {
let mut phrase_docids = ctx.get_phrase_docids(phrase)?.clone();
// It's difficult to know the expected position of the words in the phrase,
// so instead we just check the first one.
// TODO: fix this?
if let Some(word) = phrase.words(ctx).iter().flatten().next() {
if let Some(word_position_docids) = ctx.get_db_word_position_docids(*word, position)? {
phrase_docids &= word_position_docids;
docids |= ctx.get_phrase_docids(phrase)? & word_position_docids
}
}
docids |= phrase_docids;
}
if let Some(word_prefix) = term.use_prefix_db(ctx) {
@ -128,7 +123,6 @@ pub fn compute_query_term_subset_docids_within_position(
docids |= word_position_docids;
}
}
Ok(docids)
}