diff --git a/milli/src/index.rs b/milli/src/index.rs index 80f62f684..c7441c590 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -970,6 +970,7 @@ impl Index { .get::<_, Str, SerdeBincode>>(txn, main_key::EXACT_ATTRIBUTES)? .unwrap_or_default()) } + pub fn exact_attributes_ids(&self, txn: &RoTxn) -> Result> { let attrs = self.exact_attributes(txn)?; let fid_map = self.fields_ids_map(txn)?; diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index 6ac076ea4..05305d724 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -402,31 +402,42 @@ fn query_docids( wdcache: &mut WordDerivationsCache, ) -> Result { match &query.kind { - QueryKind::Exact { word, .. } => { + QueryKind::Exact { word, original_typo } => { if query.prefix && ctx.in_prefix_cache(&word) { - let doc_ids = ctx.word_prefix_docids(&word)?.unwrap_or_default(); - let exact_docids = ctx.exact_word_prefix_docids(&word)?.unwrap_or_default(); - Ok(doc_ids | exact_docids) + let mut docids = ctx.word_prefix_docids(&word)?.unwrap_or_default(); + // only add the exact docids if the word hasn't been derived + if *original_typo == 0 { + docids |= ctx.exact_word_prefix_docids(&word)?.unwrap_or_default(); + } + Ok(docids) } else if query.prefix { let words = word_derivations(&word, true, 0, ctx.words_fst(), wdcache)?; let mut docids = RoaringBitmap::new(); for (word, _typo) in words { - let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); - let exact_current_docids = ctx.exact_word_docids(&word)?.unwrap_or_default(); - docids |= current_docids | exact_current_docids; + docids |= ctx.word_docids(&word)?.unwrap_or_default(); + // only add the exact docids if the word hasn't been derived + if *original_typo == 0 { + docids |= ctx.exact_word_docids(&word)?.unwrap_or_default(); + } } Ok(docids) } else { - let word_docids = ctx.word_docids(&word)?.unwrap_or_default(); - let exact_word_docids = ctx.exact_word_docids(&word)?.unwrap_or_default(); - Ok(word_docids | exact_word_docids) + let mut docids = ctx.word_docids(&word)?.unwrap_or_default(); + // only add the exact docids if the word hasn't been derived + if *original_typo == 0 { + docids |= ctx.exact_word_docids(&word)?.unwrap_or_default(); + } + Ok(docids) } } QueryKind::Tolerant { typo, word } => { let words = word_derivations(&word, query.prefix, *typo, ctx.words_fst(), wdcache)?; let mut docids = RoaringBitmap::new(); - for (word, _typo) in words { - let current_docids = ctx.word_docids(&word)?.unwrap_or_default(); + for (word, typo) in words { + let mut current_docids = ctx.word_docids(&word)?.unwrap_or_default(); + if *typo == 0 { + current_docids |= ctx.exact_word_docids(&word)?.unwrap_or_default() + } docids |= current_docids; } Ok(docids) diff --git a/milli/src/update/index_documents/extract/extract_word_docids.rs b/milli/src/update/index_documents/extract/extract_word_docids.rs index 5f231e5aa..fbc9f6919 100644 --- a/milli/src/update/index_documents/extract/extract_word_docids.rs +++ b/milli/src/update/index_documents/extract/extract_word_docids.rs @@ -69,9 +69,11 @@ pub fn extract_word_docids( } let fid = field_id_from_position(position); if exact_attributes.contains(&fid) && !added_to_exact { + println!("is exact: {}", std::str::from_utf8(&word_bytes).unwrap()); exact_word_docids_sorter.insert(word_bytes, &value_buffer)?; added_to_exact = true; } else if !added_to_word_docids { + println!("isnt exact: {}", std::str::from_utf8(&word_bytes).unwrap()); word_docids_sorter.insert(word_bytes, &value_buffer)?; added_to_word_docids = true; }