Everyone uses the SearchContext::word_docids instead of get_db_word_docids

make get_db_word_docids private
2025-06-14 12:01:36 +02:00 · 2023-04-11 18:27:41 +02:00 · 2023-04-11 18:27:41 +02:00 · 5ab46324c4
commit 5ab46324c4
parent 325f17488a
7 changed files with 46 additions and 27 deletions
--- a/milli/src/search/new/db_cache.rs
+++ b/milli/src/search/new/db_cache.rs
@ -89,7 +89,7 @@ impl<'ctx> SearchContext<'ctx> {
    }
    /// Retrieve or insert the given value in the `word_docids` database.
-    pub fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
+    fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
        DatabaseCache::get_value(
            self.txn,
            word,
--- a/milli/src/search/new/logger/visual.rs
+++ b/milli/src/search/new/logger/visual.rs
@ -427,7 +427,7 @@ fill: \"#B6E2D3\"
                )?;
                for w in term_subset.all_single_words_except_prefix_db(ctx)? {
-                    let w = ctx.word_interner.get(w);
+                    let w = ctx.word_interner.get(w.interned());
                    writeln!(file, "{w}: word")?;
                }
                for p in term_subset.all_phrases(ctx)? {
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@ -50,6 +50,8 @@ use ranking_rules::{BoxRankingRule, RankingRule};
 use resolve_query_graph::compute_query_graph_docids;
 use sort::Sort;
 use self::interner::Interned;
 /// A structure used throughout the execution of a search query.
 pub struct SearchContext<'ctx> {
    pub index: &'ctx Index,
--- a/milli/src/search/new/query_term/mod.rs
+++ b/milli/src/search/new/query_term/mod.rs
@ -3,18 +3,18 @@ mod ntypo_subset;
 mod parse_query;
 mod phrase;
 use super::interner::{DedupInterner, Interned};
 use super::{limits, SearchContext};
 use crate::Result;
 use std::collections::BTreeSet;
 use std::ops::RangeInclusive;
 use compute_derivations::partially_initialized_term_from_word;
 use either::Either;
 pub use ntypo_subset::NTypoTermSubset;
 pub use parse_query::{located_query_terms_from_string, make_ngram, number_of_typos_allowed};
 pub use phrase::Phrase;
-use compute_derivations::partially_initialized_term_from_word;
+use super::interner::{DedupInterner, Interned};
 use super::{limits, SearchContext, Word};
 use crate::Result;
 /// A set of word derivations attached to a location in the search query.
 #[derive(Clone, PartialEq, Eq, Hash)]
@ -180,7 +180,7 @@ impl QueryTermSubset {
    pub fn all_single_words_except_prefix_db(
        &self,
        ctx: &mut SearchContext,
-    ) -> Result<BTreeSet<Interned<String>>> {
+    ) -> Result<BTreeSet<Word>> {
        let mut result = BTreeSet::default();
        // TODO: a compute_partially funtion
        if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() {
@ -197,8 +197,20 @@ impl QueryTermSubset {
                    synonyms: _,
                    use_prefix_db: _,
                } = &original.zero_typo;
-                result.extend(zero_typo.iter().copied());
+                result.extend(zero_typo.iter().copied().map(|w| {
-                result.extend(prefix_of.iter().copied());
+                    if original.ngram_words.is_some() {
                        Word::Derived(w)
                    } else {
                        Word::Original(w)
                    }
                }));
                result.extend(prefix_of.iter().copied().map(|w| {
                    if original.ngram_words.is_some() {
                        Word::Derived(w)
                    } else {
                        Word::Original(w)
                    }
                }));
            }
            NTypoTermSubset::Subset { words, phrases: _ } => {
                let ZeroTypoTerm {
@ -210,10 +222,14 @@ impl QueryTermSubset {
                } = &original.zero_typo;
                if let Some(zero_typo) = zero_typo {
                    if words.contains(zero_typo) {
-                        result.insert(*zero_typo);
+                        if original.ngram_words.is_some() {
                            result.insert(Word::Derived(*zero_typo));
                        } else {
                            result.insert(Word::Original(*zero_typo));
                        }
                    }
-                result.extend(prefix_of.intersection(words).copied());
+                }
                result.extend(prefix_of.intersection(words).copied().map(Word::Derived));
            }
            NTypoTermSubset::Nothing => {}
        }
@ -223,13 +239,13 @@ impl QueryTermSubset {
                let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else {
                    panic!()
                };
-                result.extend(one_typo.iter().copied())
+                result.extend(one_typo.iter().copied().map(Word::Derived))
            }
            NTypoTermSubset::Subset { words, phrases: _ } => {
                let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else {
                    panic!()
                };
-                result.extend(one_typo.intersection(words));
+                result.extend(one_typo.intersection(words).copied().map(Word::Derived));
            }
            NTypoTermSubset::Nothing => {}
        };
@ -239,13 +255,13 @@ impl QueryTermSubset {
                let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else {
                    panic!()
                };
-                result.extend(two_typos.iter().copied());
+                result.extend(two_typos.iter().copied().map(Word::Derived));
            }
            NTypoTermSubset::Subset { words, phrases: _ } => {
                let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else {
                    panic!()
                };
-                result.extend(two_typos.intersection(words));
+                result.extend(two_typos.intersection(words).copied().map(Word::Derived));
            }
            NTypoTermSubset::Nothing => {}
        };
--- a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs
@ -3,7 +3,8 @@ use roaring::RoaringBitmap;
 use super::{ComputedCondition, RankingRuleGraphTrait};
 use crate::search::new::interner::{DedupInterner, Interned};
 use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
-use crate::{Result, RoaringBitmapCodec, SearchContext};
+use crate::search::new::Word;
 use crate::{Result, SearchContext};
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub enum ExactnessCondition {
@ -26,7 +27,7 @@ fn compute_docids(
    let mut candidates = match exact_term {
        ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(),
        ExactTerm::Word(word) => {
-            if let Some(word_candidates) = ctx.get_db_word_docids(word)? {
+            if let Some(word_candidates) = ctx.word_docids(Word::Original(word))? {
                word_candidates
            } else {
                return Ok(Default::default());
--- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs
+++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs
@ -9,7 +9,7 @@ use crate::search::new::interner::Interned;
 use crate::search::new::query_term::{Phrase, QueryTermSubset};
 use crate::search::new::ranking_rule_graph::ComputedCondition;
 use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
-use crate::search::new::SearchContext;
+use crate::search::new::{SearchContext, Word};
 use crate::Result;
 pub fn compute_docids(
@ -54,7 +54,7 @@ pub fn compute_docids(
        {
            compute_prefix_edges(
                ctx,
-                left_word,
+                left_word.interned(),
                right_prefix,
                left_phrase,
                forward_proximity,
@ -91,7 +91,7 @@ pub fn compute_docids(
                if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) {
                    continue;
                }
-            } else if let Some(left_word_docids) = ctx.get_db_word_docids(left_word)? {
+            } else if let Some(left_word_docids) = ctx.word_docids(left_word)? {
                if universe.is_disjoint(&left_word_docids) {
                    continue;
                }
@ -101,7 +101,7 @@ pub fn compute_docids(
        for (right_word, right_phrase) in right_derivs {
            compute_non_prefix_edges(
                ctx,
-                left_word,
+                left_word.interned(),
                right_word,
                left_phrase,
                right_phrase,
@ -243,7 +243,7 @@ fn compute_non_prefix_edges(
 fn last_words_of_term_derivations(
    ctx: &mut SearchContext,
    t: &QueryTermSubset,
-) -> Result<BTreeSet<(Option<Interned<Phrase>>, Interned<String>)>> {
+) -> Result<BTreeSet<(Option<Interned<Phrase>>, Word)>> {
    let mut result = BTreeSet::new();
    for w in t.all_single_words_except_prefix_db(ctx)? {
@ -253,7 +253,7 @@ fn last_words_of_term_derivations(
        let phrase = ctx.phrase_interner.get(p);
        let last_term_of_phrase = phrase.words.last().unwrap();
        if let Some(last_word) = last_term_of_phrase {
-            result.insert((Some(p), *last_word));
+            result.insert((Some(p), Word::Original(*last_word)));
        }
    }
@ -266,7 +266,7 @@ fn first_word_of_term_iter(
    let mut result = BTreeSet::new();
    let all_words = t.all_single_words_except_prefix_db(ctx)?;
    for w in all_words {
-        result.insert((w, None));
+        result.insert((w.interned(), None));
    }
    for p in t.all_phrases(ctx)? {
        let phrase = ctx.phrase_interner.get(p);
--- a/milli/src/search/new/resolve_query_graph.rs
+++ b/milli/src/search/new/resolve_query_graph.rs
@ -9,7 +9,7 @@ use super::interner::Interned;
 use super::query_graph::QueryNodeData;
 use super::query_term::{Phrase, QueryTermSubset};
 use super::small_bitmap::SmallBitmap;
-use super::{QueryGraph, SearchContext};
+use super::{QueryGraph, SearchContext, Word};
 use crate::search::new::query_term::LocatedQueryTermSubset;
 use crate::Result;
@ -35,7 +35,7 @@ pub fn compute_query_term_subset_docids(
 ) -> Result<RoaringBitmap> {
    let mut docids = RoaringBitmap::new();
    for word in term.all_single_words_except_prefix_db(ctx)? {
-        if let Some(word_docids) = ctx.get_db_word_docids(word)? {
+        if let Some(word_docids) = ctx.word_docids(word)? {
            docids |= word_docids;
        }
    }
@ -125,7 +125,7 @@ pub fn compute_phrase_docids(
    }
    if words.len() == 1 {
        if let Some(word) = &words[0] {
-            if let Some(word_docids) = ctx.get_db_word_docids(*word)? {
+            if let Some(word_docids) = ctx.word_docids(Word::Original(*word))? {
                return Ok(word_docids);
            } else {
                return Ok(RoaringBitmap::new());