fix bug in exact search

This commit is contained in:
ad hoc 2022-03-30 16:07:59 +02:00
parent 56b4f5dce2
commit 6b2c2509b2
No known key found for this signature in database
GPG Key ID: 4F00A782990CC643
3 changed files with 26 additions and 12 deletions

View File

@ -970,6 +970,7 @@ impl Index {
.get::<_, Str, SerdeBincode<Vec<&str>>>(txn, main_key::EXACT_ATTRIBUTES)?
.unwrap_or_default())
}
pub fn exact_attributes_ids(&self, txn: &RoTxn) -> Result<HashSet<FieldId>> {
let attrs = self.exact_attributes(txn)?;
let fid_map = self.fields_ids_map(txn)?;

View File

@ -402,31 +402,42 @@ fn query_docids(
wdcache: &mut WordDerivationsCache,
) -> Result<RoaringBitmap> {
match &query.kind {
QueryKind::Exact { word, .. } => {
QueryKind::Exact { word, original_typo } => {
if query.prefix && ctx.in_prefix_cache(&word) {
let doc_ids = ctx.word_prefix_docids(&word)?.unwrap_or_default();
let exact_docids = ctx.exact_word_prefix_docids(&word)?.unwrap_or_default();
Ok(doc_ids | exact_docids)
let mut docids = ctx.word_prefix_docids(&word)?.unwrap_or_default();
// only add the exact docids if the word hasn't been derived
if *original_typo == 0 {
docids |= ctx.exact_word_prefix_docids(&word)?.unwrap_or_default();
}
Ok(docids)
} else if query.prefix {
let words = word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?;
let mut docids = RoaringBitmap::new();
for (word, _typo) in words {
let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
let exact_current_docids = ctx.exact_word_docids(&word)?.unwrap_or_default();
docids |= current_docids | exact_current_docids;
docids |= ctx.word_docids(&word)?.unwrap_or_default();
// only add the exact docids if the word hasn't been derived
if *original_typo == 0 {
docids |= ctx.exact_word_docids(&word)?.unwrap_or_default();
}
}
Ok(docids)
} else {
let word_docids = ctx.word_docids(&word)?.unwrap_or_default();
let exact_word_docids = ctx.exact_word_docids(&word)?.unwrap_or_default();
Ok(word_docids | exact_word_docids)
let mut docids = ctx.word_docids(&word)?.unwrap_or_default();
// only add the exact docids if the word hasn't been derived
if *original_typo == 0 {
docids |= ctx.exact_word_docids(&word)?.unwrap_or_default();
}
Ok(docids)
}
}
QueryKind::Tolerant { typo, word } => {
let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?;
let mut docids = RoaringBitmap::new();
for (word, _typo) in words {
let current_docids = ctx.word_docids(&word)?.unwrap_or_default();
for (word, typo) in words {
let mut current_docids = ctx.word_docids(&word)?.unwrap_or_default();
if *typo == 0 {
current_docids |= ctx.exact_word_docids(&word)?.unwrap_or_default()
}
docids |= current_docids;
}
Ok(docids)

View File

@ -69,9 +69,11 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
}
let fid = field_id_from_position(position);
if exact_attributes.contains(&fid) && !added_to_exact {
println!("is exact: {}", std::str::from_utf8(&word_bytes).unwrap());
exact_word_docids_sorter.insert(word_bytes, &value_buffer)?;
added_to_exact = true;
} else if !added_to_word_docids {
println!("isnt exact: {}", std::str::from_utf8(&word_bytes).unwrap());
word_docids_sorter.insert(word_bytes, &value_buffer)?;
added_to_word_docids = true;
}