fix bug in exact search

This commit is contained in:
ad hoc 2022-03-30 16:07:59 +02:00
parent 56b4f5dce2
commit 6b2c2509b2
No known key found for this signature in database
GPG Key ID: 4F00A782990CC643
3 changed files with 26 additions and 12 deletions

View File

@ -970,6 +970,7 @@ impl Index {
.get::<_, Str, SerdeBincode<Vec<&str>>>(txn, main_key::EXACT_ATTRIBUTES)? .get::<_, Str, SerdeBincode<Vec<&str>>>(txn, main_key::EXACT_ATTRIBUTES)?
.unwrap_or_default()) .unwrap_or_default())
} }
pub fn exact_attributes_ids(&self, txn: &RoTxn) -> Result<HashSet<FieldId>> { pub fn exact_attributes_ids(&self, txn: &RoTxn) -> Result<HashSet<FieldId>> {
let attrs = self.exact_attributes(txn)?; let attrs = self.exact_attributes(txn)?;
let fid_map = self.fields_ids_map(txn)?; let fid_map = self.fields_ids_map(txn)?;

View File

@ -402,31 +402,42 @@ fn query_docids(
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> Result<RoaringBitmap> { ) -> Result<RoaringBitmap> {
match &query.kind { match &query.kind {
QueryKind::Exact { word, .. } => { QueryKind::Exact { word, original_typo } => {
if query.prefix && ctx.in_prefix_cache(&word) { if query.prefix && ctx.in_prefix_cache(&word) {
let doc_ids = ctx.word_prefix_docids(&word)?.unwrap_or_default(); let mut docids = ctx.word_prefix_docids(&word)?.unwrap_or_default();
let exact_docids = ctx.exact_word_prefix_docids(&word)?.unwrap_or_default(); // only add the exact docids if the word hasn't been derived
Ok(doc_ids | exact_docids) if *original_typo == 0 {
docids |= ctx.exact_word_prefix_docids(&word)?.unwrap_or_default();
}
Ok(docids)
} else if query.prefix { } else if query.prefix {
let words = word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?; let words = word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?;
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for (word, _typo) in words { for (word, _typo) in words {
let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); docids |= ctx.word_docids(&word)?.unwrap_or_default();
let exact_current_docids = ctx.exact_word_docids(&word)?.unwrap_or_default(); // only add the exact docids if the word hasn't been derived
docids |= current_docids | exact_current_docids; if *original_typo == 0 {
docids |= ctx.exact_word_docids(&word)?.unwrap_or_default();
}
} }
Ok(docids) Ok(docids)
} else { } else {
let word_docids = ctx.word_docids(&word)?.unwrap_or_default(); let mut docids = ctx.word_docids(&word)?.unwrap_or_default();
let exact_word_docids = ctx.exact_word_docids(&word)?.unwrap_or_default(); // only add the exact docids if the word hasn't been derived
Ok(word_docids | exact_word_docids) if *original_typo == 0 {
docids |= ctx.exact_word_docids(&word)?.unwrap_or_default();
}
Ok(docids)
} }
} }
QueryKind::Tolerant { typo, word } => { QueryKind::Tolerant { typo, word } => {
let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?; let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?;
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for (word, _typo) in words { for (word, typo) in words {
let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); let mut current_docids = ctx.word_docids(&word)?.unwrap_or_default();
if *typo == 0 {
current_docids |= ctx.exact_word_docids(&word)?.unwrap_or_default()
}
docids |= current_docids; docids |= current_docids;
} }
Ok(docids) Ok(docids)

View File

@ -69,9 +69,11 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
} }
let fid = field_id_from_position(position); let fid = field_id_from_position(position);
if exact_attributes.contains(&fid) && !added_to_exact { if exact_attributes.contains(&fid) && !added_to_exact {
println!("is exact: {}", std::str::from_utf8(&word_bytes).unwrap());
exact_word_docids_sorter.insert(word_bytes, &value_buffer)?; exact_word_docids_sorter.insert(word_bytes, &value_buffer)?;
added_to_exact = true; added_to_exact = true;
} else if !added_to_word_docids { } else if !added_to_word_docids {
println!("isnt exact: {}", std::str::from_utf8(&word_bytes).unwrap());
word_docids_sorter.insert(word_bytes, &value_buffer)?; word_docids_sorter.insert(word_bytes, &value_buffer)?;
added_to_word_docids = true; added_to_word_docids = true;
} }