Decide to use prefix DB if the word is not an ngram

2025-07-03 03:47:02 +02:00 · 2023-04-12 15:14:00 +02:00 · 2023-04-12 15:14:00 +02:00 · 38b7b31beb
commit 38b7b31beb
parent 7a01f20df7
2 changed files with 23 additions and 8 deletions
--- a/milli/src/search/new/query_term/compute_derivations.rs
+++ b/milli/src/search/new/query_term/compute_derivations.rs
@ -177,6 +177,7 @@ pub fn partially_initialized_term_from_word(
    word: &str,
    max_typo: u8,
    is_prefix: bool,
    is_ngram: bool,
 ) -> Result<QueryTerm> {
    let word_interned = ctx.word_interner.insert(word.to_owned());
@ -197,12 +198,19 @@ pub fn partially_initialized_term_from_word(
    let fst = ctx.index.words_fst(ctx.txn)?;
    let use_prefix_db = is_prefix
-        && ctx
+        && (ctx
            .index
            .word_prefix_docids
            .remap_data_type::<DecodeIgnore>()
            .get(ctx.txn, word)?
-            .is_some();
+            .is_some()
            || (!is_ngram
                && ctx
                    .index
                    .exact_word_prefix_docids
                    .remap_data_type::<DecodeIgnore>()
                    .get(ctx.txn, word)?
                    .is_some()));
    let use_prefix_db = if use_prefix_db { Some(word_interned) } else { None };
    let mut zero_typo = None;
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@ -1,8 +1,8 @@
-use charabia::{normalizer::NormalizedTokenIter, SeparatorKind, TokenKind};
+use charabia::normalizer::NormalizedTokenIter;
-
+use charabia::{SeparatorKind, TokenKind};
 use crate::{Result, SearchContext, MAX_WORD_LENGTH};
 use super::*;
 use crate::{Result, SearchContext, MAX_WORD_LENGTH};
 /// Convert the tokenised search query into a list of located query terms.
 // TODO: checking if the positions are correct for phrases, separators, ngrams
@ -51,6 +51,7 @@ pub fn located_query_terms_from_string(
                                word,
                                nbr_typos(word),
                                false,
                                false,
                            )?;
                            let located_term = LocatedQueryTerm {
                                value: ctx.term_interner.push(term),
@ -62,8 +63,13 @@ pub fn located_query_terms_from_string(
                    }
                } else {
                    let word = token.lemma();
-                    let term =
+                    let term = partially_initialized_term_from_word(
-                        partially_initialized_term_from_word(ctx, word, nbr_typos(word), true)?;
+                        ctx,
                        word,
                        nbr_typos(word),
                        true,
                        false,
                    )?;
                    let located_term = LocatedQueryTerm {
                        value: ctx.term_interner.push(term),
                        positions: position..=position,
@ -195,7 +201,8 @@ pub fn make_ngram(
    let max_nbr_typos =
        number_of_typos_allowed(ngram_str.as_str()).saturating_sub(terms.len() as u8 - 1);
-    let mut term = partially_initialized_term_from_word(ctx, &ngram_str, max_nbr_typos, is_prefix)?;
+    let mut term =
        partially_initialized_term_from_word(ctx, &ngram_str, max_nbr_typos, is_prefix, true)?;
    // Now add the synonyms
    let index_synonyms = ctx.index.synonyms(ctx.txn)?;