Compute edges of proximity graph lazily

2025-05-25 09:03:59 +02:00 · 2023-03-21 10:44:40 +01:00 · 2023-03-21 10:44:40 +01:00 · 83e5b4ed0d
commit 83e5b4ed0d
parent 272cd7ebbd
12 changed files with 345 additions and 841 deletions
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -367,6 +367,7 @@ pub fn word_derivations<'c>(
    match cache.entry((word.to_string(), is_prefix, max_typo)) {
        Entry::Occupied(entry) => Ok(entry.into_mut()),
        Entry::Vacant(entry) => {
            // println!("word derivations {word} {is_prefix} {max_typo}");
            let mut derived_words = Vec::new();
            if max_typo == 0 {
                if is_prefix {
--- a/milli/src/search/new/graph_based_ranking_rule.rs
+++ b/milli/src/search/new/graph_based_ranking_rule.rs
@ -318,9 +318,10 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
            let mut used_words = HashSet::new();
            let mut used_phrases = HashSet::new();
            for condition in used_conditions.iter() {
-                let condition = graph.conditions_interner.get(condition);
+                let (ws, ps) =
-                used_words.extend(G::words_used_by_condition(ctx, condition)?);
+                    condition_docids_cache.get_condition_used_words_and_phrases(condition);
-                used_phrases.extend(G::phrases_used_by_condition(ctx, condition)?);
+                used_words.extend(ws);
                used_phrases.extend(ps);
            }
            // 2. Remove the unused words and phrases from all the nodes in the graph
            let mut nodes_to_remove = vec![];
--- a/milli/src/search/new/interner.rs
+++ b/milli/src/search/new/interner.rs
@ -30,7 +30,7 @@ impl<T> Interned<T> {
 #[derive(Clone)]
 pub struct DedupInterner<T> {
    stable_store: Vec<T>,
-    lookup: FxHashMap<T, Interned<T>>,
+    lookup: FxHashMap<T, Interned<T>>, // TODO: Arc
 }
 impl<T> Default for DedupInterner<T> {
    fn default() -> Self {
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@ -287,368 +287,3 @@ impl<'a> Search<'a> {
        todo!()
    }
 }
 #[cfg(test)]
 mod tests {
    // use crate::allocator::ALLOC;
    use std::fs::File;
    use std::io::{BufRead, BufReader, Cursor, Seek};
    use std::time::Instant;
    use big_s::S;
    use heed::EnvOpenOptions;
    use maplit::hashset;
    use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
    // use crate::search::new::logger::detailed::DetailedSearchLogger;
    use crate::search::new::logger::DefaultSearchLogger;
    use crate::search::new::{execute_search, SearchContext};
    use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
    use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy};
    #[test]
    fn search_wiki_new() {
        let mut options = EnvOpenOptions::new();
        options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
        let index = Index::new(options, "data_wiki").unwrap();
        let txn = index.read_txn().unwrap();
        println!("nbr docids: {}", index.documents_ids(&txn).unwrap().len());
        loop {
            let start = Instant::now();
            // let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
            let mut ctx = SearchContext::new(&index, &txn);
            let results = execute_search(
                &mut ctx,
                "released from prison by the government",
                // "which a the releases from poison by the government",
                // "sun flower s are the best",
                // "zero config",
                TermsMatchingStrategy::Last,
                None,
                0,
                20,
                &mut DefaultSearchLogger,
                &mut DefaultSearchLogger,
                // &mut logger,
            )
            .unwrap();
            // logger.write_d2_description(&mut ctx);
            let elapsed = start.elapsed();
            println!("{}us", elapsed.as_micros());
            let _documents = index
                .documents(&txn, results.documents_ids.iter().copied())
                .unwrap()
                .into_iter()
                .map(|(id, obkv)| {
                    let mut object = serde_json::Map::default();
                    for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
                        let value = obkv.get(fid).unwrap();
                        let value: serde_json::Value = serde_json::from_slice(value).unwrap();
                        object.insert(fid_name.to_owned(), value);
                    }
                    (id, serde_json::to_string_pretty(&object).unwrap())
                })
                .collect::<Vec<_>>();
            println!("{}us: {:?}", elapsed.as_micros(), results);
        }
        // for (id, document) in documents {
        //     println!("{id}:");
        //     // println!("{document}");
        // }
    }
    #[test]
    fn search_wiki_old() {
        let mut options = EnvOpenOptions::new();
        options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
        let index = Index::new(options, "data_wiki").unwrap();
        let txn = index.read_txn().unwrap();
        let rr = index.criteria(&txn).unwrap();
        println!("{rr:?}");
        let start = Instant::now();
        let mut s = Search::new(&txn, &index);
        s.query(
            // "which a the releases from poison by the government",
            // "sun flower s are the best",
            "zero config",
        );
        s.terms_matching_strategy(TermsMatchingStrategy::Last);
        // s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlyIterative);
        let docs = s.execute().unwrap();
        let elapsed = start.elapsed();
        let documents = index
            .documents(&txn, docs.documents_ids.iter().copied())
            .unwrap()
            .into_iter()
            .map(|(id, obkv)| {
                let mut object = serde_json::Map::default();
                for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
                    let value = obkv.get(fid).unwrap();
                    let value: serde_json::Value = serde_json::from_slice(value).unwrap();
                    object.insert(fid_name.to_owned(), value);
                }
                (id, serde_json::to_string_pretty(&object).unwrap())
            })
            .collect::<Vec<_>>();
        println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
        for (id, _document) in documents {
            println!("{id}:");
            // println!("{document}");
        }
    }
    #[test]
    fn search_movies_new() {
        let mut options = EnvOpenOptions::new();
        options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
        let index = Index::new(options, "data_movies").unwrap();
        let txn = index.read_txn().unwrap();
        // let primary_key = index.primary_key(&txn).unwrap().unwrap();
        // let primary_key = index.fields_ids_map(&txn).unwrap().id(primary_key).unwrap();
        // loop {
        let start = Instant::now();
        let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
        let mut ctx = SearchContext::new(&index, &txn);
        let results = execute_search(
            &mut ctx,
            "releases from poison by the government",
            TermsMatchingStrategy::Last,
            None,
            0,
            20,
            &mut DefaultSearchLogger,
            &mut logger,
        )
        .unwrap();
        logger.write_d2_description(&mut ctx);
        let elapsed = start.elapsed();
        // let ids = index
        //     .documents(&txn, results.iter().copied())
        //     .unwrap()
        //     .into_iter()
        //     .map(|x| {
        //         let obkv = &x.1;
        //         let id = obkv.get(primary_key).unwrap();
        //         let id: serde_json::Value = serde_json::from_slice(id).unwrap();
        //         id.as_str().unwrap().to_owned()
        //     })
        //     .collect::<Vec<_>>();
        println!("{}us: {results:?}", elapsed.as_micros());
        // println!("external ids: {ids:?}");
        // }
    }
    #[test]
    fn search_movies_old() {
        let mut options = EnvOpenOptions::new();
        options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
        let index = Index::new(options, "data_movies").unwrap();
        let txn = index.read_txn().unwrap();
        let rr = index.criteria(&txn).unwrap();
        println!("{rr:?}");
        let primary_key = index.primary_key(&txn).unwrap().unwrap();
        let primary_key = index.fields_ids_map(&txn).unwrap().id(primary_key).unwrap();
        let start = Instant::now();
        let mut s = Search::new(&txn, &index);
        s.query("which a the releases from poison by the government");
        s.terms_matching_strategy(TermsMatchingStrategy::Last);
        s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
        let docs = s.execute().unwrap();
        let elapsed = start.elapsed();
        let ids = index
            .documents(&txn, docs.documents_ids.iter().copied())
            .unwrap()
            .into_iter()
            .map(|x| {
                let obkv = &x.1;
                let id = obkv.get(primary_key).unwrap();
                let id: serde_json::Value = serde_json::from_slice(id).unwrap();
                id.as_str().unwrap().to_owned()
            })
            .collect::<Vec<_>>();
        println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
        println!("external ids: {ids:?}");
    }
    #[test]
    fn _settings_movies() {
        let mut options = EnvOpenOptions::new();
        options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
        let index = Index::new(options, "data_movies").unwrap();
        let mut wtxn = index.write_txn().unwrap();
        let config = IndexerConfig::default();
        let mut builder = Settings::new(&mut wtxn, &index, &config);
        builder.set_min_word_len_one_typo(5);
        builder.set_min_word_len_two_typos(100);
        builder.set_sortable_fields(hashset! { S("release_date") });
        builder.set_criteria(vec![
            Criterion::Words,
            Criterion::Typo,
            Criterion::Proximity,
            Criterion::Asc("release_date".to_owned()),
        ]);
        builder.execute(|_| (), || false).unwrap();
        wtxn.commit().unwrap();
    }
    #[test]
    fn _index_movies() {
        let mut options = EnvOpenOptions::new();
        options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
        let index = Index::new(options, "data_movies").unwrap();
        let mut wtxn = index.write_txn().unwrap();
        let primary_key = "id";
        let searchable_fields = vec!["title", "overview"];
        let filterable_fields = vec!["release_date", "genres"];
        let config = IndexerConfig::default();
        let mut builder = Settings::new(&mut wtxn, &index, &config);
        builder.set_primary_key(primary_key.to_owned());
        let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
        builder.set_searchable_fields(searchable_fields);
        let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
        builder.set_filterable_fields(filterable_fields);
        builder.set_min_word_len_one_typo(5);
        builder.set_min_word_len_two_typos(100);
        builder.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
        builder.execute(|_| (), || false).unwrap();
        let config = IndexerConfig::default();
        let indexing_config = IndexDocumentsConfig::default();
        let builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false)
                .unwrap();
        let documents = documents_from(
            "/Users/meilisearch/Documents/milli2/benchmarks/datasets/movies.json",
            "json",
        );
        let (builder, user_error) = builder.add_documents(documents).unwrap();
        user_error.unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();
        index.prepare_for_closing().wait();
    }
    #[test]
    fn _index_wiki() {
        let mut options = EnvOpenOptions::new();
        options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
        let index = Index::new(options, "data_wiki").unwrap();
        let mut wtxn = index.write_txn().unwrap();
        // let primary_key = "id";
        let searchable_fields = vec!["body", "title", "url"];
        // let filterable_fields = vec![];
        let config = IndexerConfig::default();
        let mut builder = Settings::new(&mut wtxn, &index, &config);
        // builder.set_primary_key(primary_key.to_owned());
        let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
        builder.set_searchable_fields(searchable_fields);
        // let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
        // builder.set_filterable_fields(filterable_fields);
        // builder.set_min_word_len_one_typo(5);
        // builder.set_min_word_len_two_typos(100);
        builder.set_criteria(vec![Criterion::Words, Criterion::Typo, Criterion::Proximity]);
        builder.execute(|_| (), || false).unwrap();
        let config = IndexerConfig::default();
        let indexing_config =
            IndexDocumentsConfig { autogenerate_docids: true, ..Default::default() };
        let builder =
            IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false)
                .unwrap();
        let documents = documents_from(
            "/Users/meilisearch/Documents/milli2/benchmarks/datasets/smol-wiki-articles.csv",
            "csv",
        );
        let (builder, user_error) = builder.add_documents(documents).unwrap();
        user_error.unwrap();
        builder.execute().unwrap();
        wtxn.commit().unwrap();
        index.prepare_for_closing().wait();
    }
    fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader<impl BufRead + Seek> {
        let reader = File::open(filename)
            .unwrap_or_else(|_| panic!("could not find the dataset in: {}", filename));
        let reader = BufReader::new(reader);
        let documents = match filetype {
            "csv" => documents_from_csv(reader).unwrap(),
            "json" => documents_from_json(reader).unwrap(),
            "jsonl" => documents_from_jsonl(reader).unwrap(),
            otherwise => panic!("invalid update format {:?}", otherwise),
        };
        DocumentsBatchReader::from_reader(Cursor::new(documents)).unwrap()
    }
    fn documents_from_jsonl(reader: impl BufRead) -> crate::Result<Vec<u8>> {
        let mut documents = DocumentsBatchBuilder::new(Vec::new());
        for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
            let object = result.unwrap();
            documents.append_json_object(&object)?;
        }
        documents.into_inner().map_err(Into::into)
    }
    fn documents_from_json(reader: impl BufRead) -> crate::Result<Vec<u8>> {
        let mut documents = DocumentsBatchBuilder::new(Vec::new());
        documents.append_json_array(reader)?;
        documents.into_inner().map_err(Into::into)
    }
    fn documents_from_csv(reader: impl BufRead) -> crate::Result<Vec<u8>> {
        let csv = csv::Reader::from_reader(reader);
        let mut documents = DocumentsBatchBuilder::new(Vec::new());
        documents.append_csv(csv)?;
        documents.into_inner().map_err(Into::into)
    }
 }
--- a/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs
+++ b/milli/src/search/new/ranking_rule_graph/condition_docids_cache.rs
@ -1,19 +1,28 @@
 use std::marker::PhantomData;
-use fxhash::FxHashMap;
+use fxhash::{FxHashMap, FxHashSet};
 use roaring::RoaringBitmap;
 use super::{RankingRuleGraph, RankingRuleGraphTrait};
 use crate::search::new::interner::Interned;
 use crate::search::new::query_term::Phrase;
 use crate::search::new::SearchContext;
 use crate::Result;
 // TODO: give a generation to each universe, then be able to get the exact
 // delta of docids between two universes of different generations!
 #[derive(Default)]
 pub struct ComputedCondition {
    docids: RoaringBitmap,
    universe_len: u64,
    used_words: FxHashSet<Interned<String>>,
    used_phrases: FxHashSet<Interned<Phrase>>,
 }
 /// A cache storing the document ids associated with each ranking rule edge
 pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
-    pub cache: FxHashMap<Interned<G::Condition>, (u64, RoaringBitmap)>,
+    pub cache: FxHashMap<Interned<G::Condition>, ComputedCondition>,
    _phantom: PhantomData<G>,
 }
 impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> {
@ -22,6 +31,14 @@ impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> {
    }
 }
 impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
    pub fn get_condition_used_words_and_phrases(
        &mut self,
        interned_condition: Interned<G::Condition>,
    ) -> (&FxHashSet<Interned<String>>, &FxHashSet<Interned<Phrase>>) {
        let ComputedCondition { used_words, used_phrases, .. } = &self.cache[&interned_condition];
        (used_words, used_phrases)
    }
    /// Retrieve the document ids for the given edge condition.
    ///
    /// If the cache does not yet contain these docids, they are computed
@ -30,14 +47,14 @@ impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
        &'s mut self,
        ctx: &mut SearchContext<'ctx>,
        interned_condition: Interned<G::Condition>,
-        graph: &RankingRuleGraph<G>,
+        graph: &mut RankingRuleGraph<G>,
        // TODO: maybe universe doesn't belong here
        universe: &RoaringBitmap,
    ) -> Result<&'s RoaringBitmap> {
        if self.cache.contains_key(&interned_condition) {
            // TODO compare length of universe compared to the one in self
            // if it is smaller, then update the value
-            let (universe_len, docids) = self.cache.entry(interned_condition).or_default();
+            let ComputedCondition { docids, universe_len, .. } =
                self.cache.entry(interned_condition).or_default();
            if *universe_len == universe.len() {
                return Ok(docids);
            } else {
@ -46,12 +63,13 @@ impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
                return Ok(docids);
            }
        }
-        // TODO: maybe universe doesn't belong here
+        let condition = graph.conditions_interner.get_mut(interned_condition);
-        let condition = graph.conditions_interner.get(interned_condition);
+        let (docids, used_words, used_phrases) = G::resolve_condition(ctx, condition, universe)?;
-        // TODO: faster way to do this?
+        let _ = self.cache.insert(
-        let docids = G::resolve_condition(ctx, condition, universe)?;
+            interned_condition,
-        let _ = self.cache.insert(interned_condition, (universe.len(), docids));
+            ComputedCondition { docids, universe_len: universe.len(), used_words, used_phrases },
-        let (_, docids) = &self.cache[&interned_condition];
+        );
        let ComputedCondition { docids, .. } = &self.cache[&interned_condition];
        Ok(docids)
    }
 }
--- a/milli/src/search/new/ranking_rule_graph/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/mod.rs
@ -15,11 +15,11 @@ mod proximity;
 /// Implementation of the `typo` ranking rule
 mod typo;
 use std::collections::HashSet;
 use std::hash::Hash;
 pub use condition_docids_cache::ConditionDocIdsCache;
 pub use dead_ends_cache::DeadEndsCache;
 use fxhash::FxHashSet;
 pub use proximity::{ProximityCondition, ProximityGraph};
 use roaring::RoaringBitmap;
 pub use typo::{TypoCondition, TypoGraph};
@ -80,23 +80,13 @@ pub trait RankingRuleGraphTrait: Sized {
        condition: &Self::Condition,
    ) -> Result<String>;
    fn words_used_by_condition<'ctx>(
        ctx: &mut SearchContext<'ctx>,
        condition: &Self::Condition,
    ) -> Result<HashSet<Interned<String>>>;
    fn phrases_used_by_condition<'ctx>(
        ctx: &mut SearchContext<'ctx>,
        condition: &Self::Condition,
    ) -> Result<HashSet<Interned<Phrase>>>;
    /// Compute the document ids associated with the given edge condition,
    /// restricted to the given universe.
    fn resolve_condition<'ctx>(
        ctx: &mut SearchContext<'ctx>,
        condition: &Self::Condition,
        universe: &RoaringBitmap,
-    ) -> Result<RoaringBitmap>;
+    ) -> Result<(RoaringBitmap, FxHashSet<Interned<String>>, FxHashSet<Interned<Phrase>>)>;
    /// Return the costs and conditions of the edges going from the source node to the destination node
    fn build_edges<'ctx>(
--- a/milli/src/search/new/ranking_rule_graph/proximity/build.rs
+++ b/milli/src/search/new/ranking_rule_graph/proximity/build.rs
@ -1,56 +1,18 @@
 #![allow(clippy::too_many_arguments)]
 use std::collections::BTreeMap;
 use heed::RoTxn;
 use super::ProximityCondition;
 use crate::search::new::db_cache::DatabaseCache;
 use crate::search::new::interner::{DedupInterner, Interned};
 use crate::search::new::query_graph::QueryNodeData;
-use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm};
+use crate::search::new::query_term::LocatedQueryTerm;
 use crate::search::new::ranking_rule_graph::proximity::WordPair;
 use crate::search::new::{QueryNode, SearchContext};
 use crate::Result;
 fn last_word_of_term_iter<'t>(
    t: &'t QueryTerm,
    phrase_interner: &'t DedupInterner<Phrase>,
 ) -> impl Iterator<Item = (Option<Interned<Phrase>>, Interned<String>)> + 't {
    t.all_single_words_except_prefix_db().map(|w| (None, w)).chain(t.all_phrases().flat_map(
        move |p| {
            let phrase = phrase_interner.get(p);
            phrase.words.last().unwrap().map(|last| (Some(p), last))
        },
    ))
 }
 fn first_word_of_term_iter<'t>(
    t: &'t QueryTerm,
    phrase_interner: &'t DedupInterner<Phrase>,
 ) -> impl Iterator<Item = (Interned<String>, Option<Interned<Phrase>>)> + 't {
    t.all_single_words_except_prefix_db().map(|w| (w, None)).chain(t.all_phrases().flat_map(
        move |p| {
            let phrase = phrase_interner.get(p);
            phrase.words.first().unwrap().map(|first| (first, Some(p)))
        },
    ))
 }
 pub fn build_edges<'ctx>(
-    ctx: &mut SearchContext<'ctx>,
+    _ctx: &mut SearchContext<'ctx>,
    conditions_interner: &mut DedupInterner<ProximityCondition>,
    from_node: &QueryNode,
    to_node: &QueryNode,
 ) -> Result<Vec<(u8, Option<Interned<ProximityCondition>>)>> {
    let SearchContext {
        index,
        txn,
        db_cache,
        word_interner,
        phrase_interner,
        term_interner,
        term_docids: _,
    } = ctx;
    let right_term = match &to_node.data {
        QueryNodeData::End => return Ok(vec![(0, None)]),
        QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]),
@ -59,13 +21,11 @@ pub fn build_edges<'ctx>(
    let LocatedQueryTerm { value: right_term_interned, positions: right_positions } = right_term;
-    let (right_term, right_start_position, right_ngram_length) =
+    let (right_start_position, right_ngram_length) =
-        (term_interner.get(*right_term_interned), *right_positions.start(), right_positions.len());
+        (*right_positions.start(), right_positions.len());
-    let (left_term, left_end_position) = match &from_node.data {
+    let (left_term_interned, left_end_position) = match &from_node.data {
-        QueryNodeData::Term(LocatedQueryTerm { value, positions }) => {
+        QueryNodeData::Term(LocatedQueryTerm { value, positions }) => (*value, *positions.end()),
            (term_interner.get(*value), *positions.end())
        }
        QueryNodeData::Deleted => return Ok(vec![]),
        QueryNodeData::Start => {
            return Ok(vec![(
@ -94,175 +54,24 @@ pub fn build_edges<'ctx>(
        )]);
    }
-    let mut cost_word_pairs = BTreeMap::<u8, Vec<WordPair>>::new();
+    let mut conditions = vec![];
-
+    for cost in right_ngram_length..(7 + right_ngram_length) {
-    if let Some(right_prefix) = right_term.use_prefix_db {
+        let cost = cost as u8;
-        for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) {
+        conditions.push((
-            add_prefix_edges(
+            cost,
-                index,
+            Some(conditions_interner.insert(ProximityCondition::Uninit {
-                txn,
+                left_term: left_term_interned,
-                db_cache,
+                right_term: *right_term_interned,
-                word_interner,
+                right_term_ngram_len: right_ngram_length as u8,
                right_ngram_length,
                left_word,
                right_prefix,
                &mut cost_word_pairs,
                left_phrase,
            )?;
        }
    }
    // TODO: add safeguard in case the cartesian product is too large!
    // even if we restrict the word derivations to a maximum of 100, the size of the
    // caterisan product could reach a maximum of 10_000 derivations, which is way too much.
    // Maybe prioritise the product of zero typo derivations, then the product of zero-typo/one-typo
    // + one-typo/zero-typo, then one-typo/one-typo, then ... until an arbitrary limit has been
    // reached
    for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) {
        for (right_word, right_phrase) in first_word_of_term_iter(right_term, phrase_interner) {
            add_non_prefix_edges(
                index,
                txn,
                db_cache,
                word_interner,
                right_ngram_length,
                left_word,
                right_word,
                &mut cost_word_pairs,
                &[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(),
            )?;
        }
    }
    let mut new_edges = cost_word_pairs
        .into_iter()
        .map(|(cost, word_pairs)| {
            (
                cost,
-                Some(
+            })),
-                    conditions_interner
+        ))
-                        .insert(ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice() }),
+    }
-                ),
+
-            )
+    conditions.push((
-        })
+        (7 + right_ngram_length) as u8,
        .collect::<Vec<_>>();
    new_edges.push((
        8 + (right_ngram_length - 1) as u8,
        Some(conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned })),
    ));
    Ok(new_edges)
 }
-fn add_prefix_edges<'ctx>(
+    Ok(conditions)
    index: &mut &crate::Index,
    txn: &'ctx RoTxn,
    db_cache: &mut DatabaseCache<'ctx>,
    word_interner: &mut DedupInterner<String>,
    right_ngram_length: usize,
    left_word: Interned<String>,
    right_prefix: Interned<String>,
    cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>,
    left_phrase: Option<Interned<Phrase>>,
 ) -> Result<()> {
    for proximity in 1..=(8 - right_ngram_length) {
        let cost = (proximity + right_ngram_length - 1) as u8;
        // TODO: if we had access to the universe here, we could already check whether
        // the bitmap corresponding to this word pair is disjoint with the universe or not
        if db_cache
            .get_word_prefix_pair_proximity_docids(
                index,
                txn,
                word_interner,
                left_word,
                right_prefix,
                proximity as u8,
            )?
            .is_some()
        {
            cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefix {
                phrases: left_phrase.into_iter().collect(),
                left: left_word,
                right_prefix,
                proximity: proximity as u8,
            });
        }
        // No swapping when computing the proximity between a phrase and a word
        if left_phrase.is_none()
            && db_cache
                .get_prefix_word_pair_proximity_docids(
                    index,
                    txn,
                    word_interner,
                    right_prefix,
                    left_word,
                    proximity as u8 - 1,
                )?
                .is_some()
        {
            cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::WordPrefixSwapped {
                left_prefix: right_prefix,
                right: left_word,
                proximity: proximity as u8 - 1,
            });
        }
    }
    Ok(())
 }
 fn add_non_prefix_edges<'ctx>(
    index: &mut &crate::Index,
    txn: &'ctx RoTxn,
    db_cache: &mut DatabaseCache<'ctx>,
    word_interner: &mut DedupInterner<String>,
    right_ngram_length: usize,
    word1: Interned<String>,
    word2: Interned<String>,
    cost_proximity_word_pairs: &mut BTreeMap<u8, Vec<WordPair>>,
    phrases: &[Interned<Phrase>],
 ) -> Result<()> {
    for proximity in 1..=(8 - right_ngram_length) {
        let cost = (proximity + right_ngram_length - 1) as u8;
        if db_cache
            .get_word_pair_proximity_docids(
                index,
                txn,
                word_interner,
                word1,
                word2,
                proximity as u8,
            )?
            .is_some()
        {
            cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words {
                phrases: phrases.to_vec(),
                left: word1,
                right: word2,
                proximity: proximity as u8,
            });
        }
        if proximity > 1
            // no swapping when either term is a phrase
            && phrases.is_empty()
            && db_cache
                .get_word_pair_proximity_docids(
                    index,
                    txn,
                    word_interner,
                    word2,
                    word1,
                    proximity as u8 - 1,
                )?
                .is_some()
        {
            cost_proximity_word_pairs.entry(cost).or_default().push(WordPair::Words {
                phrases: vec![],
                left: word2,
                right: word1,
                proximity: proximity as u8 - 1,
            });
        }
    }
    Ok(())
 }
--- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs
+++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs
@ -1,6 +1,15 @@
 #![allow(clippy::too_many_arguments)]
 use std::iter::FromIterator;
 use fxhash::FxHashSet;
 use heed::RoTxn;
 use roaring::RoaringBitmap;
-use super::{ProximityCondition, WordPair};
+use super::ProximityCondition;
 use crate::search::new::db_cache::DatabaseCache;
 use crate::search::new::interner::{DedupInterner, Interned};
 use crate::search::new::query_term::{Phrase, QueryTerm};
 use crate::search::new::SearchContext;
 use crate::{CboRoaringBitmapCodec, Result};
@ -8,7 +17,7 @@ pub fn compute_docids<'ctx>(
    ctx: &mut SearchContext<'ctx>,
    condition: &ProximityCondition,
    universe: &RoaringBitmap,
-) -> Result<RoaringBitmap> {
+) -> Result<(RoaringBitmap, FxHashSet<Interned<String>>, FxHashSet<Interned<Phrase>>)> {
    let SearchContext {
        index,
        txn,
@ -18,96 +27,238 @@ pub fn compute_docids<'ctx>(
        phrase_interner,
        term_interner,
    } = ctx;
-    let pairs = match condition {
+
-        ProximityCondition::Term { term } => {
+    let (left_term, right_term, right_term_ngram_len, cost) = match condition {
-            return term_docids
+        ProximityCondition::Uninit { left_term, right_term, right_term_ngram_len, cost } => {
-                .get_query_term_docids(
+            (*left_term, *right_term, *right_term_ngram_len, *cost)
-                    index,
+        }
-                    txn,
+        ProximityCondition::Term { term } => {
-                    db_cache,
+            let term_v = term_interner.get(*term);
-                    word_interner,
+            return Ok((
-                    term_interner,
+                term_docids
-                    phrase_interner,
+                    .get_query_term_docids(
-                    *term,
+                        index,
-                )
+                        txn,
-                .cloned()
+                        db_cache,
                        word_interner,
                        term_interner,
                        phrase_interner,
                        *term,
                    )?
                    .clone(),
                FxHashSet::from_iter(term_v.all_single_words_except_prefix_db()),
                FxHashSet::from_iter(term_v.all_phrases()),
            ));
        }
        ProximityCondition::Pairs { pairs } => pairs,
    };
-    let mut pair_docids = RoaringBitmap::new();
+
-    for pair in pairs.iter() {
+    let left_term = term_interner.get(left_term);
-        let pair = match pair {
+    let right_term = term_interner.get(right_term);
-            WordPair::Words { phrases, left, right, proximity } => {
+
-                let mut docids = db_cache
+    // e.g. for the simple words `sun .. flower`
-                    .get_word_pair_proximity_docids(
+    // the cost is 5
-                        index,
+    // the forward proximity is 5
-                        txn,
+    // the backward proximity is 4
-                        word_interner,
+    //
-                        *left,
+    // for the 2gram `the sunflower`
-                        *right,
+    // the cost is 5
-                        *proximity,
+    // the forward proximity is 4
-                    )?
+    // the backward proximity is 3
-                    .map(CboRoaringBitmapCodec::deserialize_from)
+    let forward_proximity = 1 + cost - right_term_ngram_len;
-                    .transpose()?
+    let backward_proximity = cost - right_term_ngram_len;
-                    .unwrap_or_default();
+
-                if !docids.is_empty() {
+    let mut used_words = FxHashSet::default();
-                    for phrase in phrases {
+    let mut used_phrases = FxHashSet::default();
-                        docids &= ctx.term_docids.get_phrase_docids(
+
-                            index,
+    let mut docids = RoaringBitmap::new();
-                            txn,
+
-                            db_cache,
+    if let Some(right_prefix) = right_term.use_prefix_db {
-                            word_interner,
+        for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) {
-                            &ctx.phrase_interner,
+            compute_prefix_edges(
-                            *phrase,
+                index,
-                        )?;
+                txn,
-                    }
+                db_cache,
-                }
+                word_interner,
-                docids
+                left_word,
-            }
+                right_prefix,
-            WordPair::WordPrefix { phrases, left, right_prefix, proximity } => {
+                left_phrase,
-                let mut docids = db_cache
+                forward_proximity,
-                    .get_word_prefix_pair_proximity_docids(
+                backward_proximity,
-                        index,
+                &mut docids,
-                        txn,
+                universe,
-                        word_interner,
+                &mut used_words,
-                        *left,
+                &mut used_phrases,
-                        *right_prefix,
+            )?;
-                        *proximity,
+        }
                    )?
                    .map(CboRoaringBitmapCodec::deserialize_from)
                    .transpose()?
                    .unwrap_or_default();
                if !docids.is_empty() {
                    for phrase in phrases {
                        docids &= ctx.term_docids.get_phrase_docids(
                            index,
                            txn,
                            db_cache,
                            word_interner,
                            &ctx.phrase_interner,
                            *phrase,
                        )?;
                    }
                }
                docids
            }
            WordPair::WordPrefixSwapped { left_prefix, right, proximity } => db_cache
                .get_prefix_word_pair_proximity_docids(
                    index,
                    txn,
                    word_interner,
                    *left_prefix,
                    *right,
                    *proximity,
                )?
                .map(CboRoaringBitmapCodec::deserialize_from)
                .transpose()?
                .unwrap_or_default(),
        };
        // TODO: deserialize bitmap within a universe
        let bitmap = universe & pair;
        pair_docids |= bitmap;
    }
-    Ok(pair_docids)
+    // TODO: add safeguard in case the cartesian product is too large!
    // even if we restrict the word derivations to a maximum of 100, the size of the
    // caterisan product could reach a maximum of 10_000 derivations, which is way too much.
    // Maybe prioritise the product of zero typo derivations, then the product of zero-typo/one-typo
    // + one-typo/zero-typo, then one-typo/one-typo, then ... until an arbitrary limit has been
    // reached
    for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) {
        for (right_word, right_phrase) in first_word_of_term_iter(right_term, phrase_interner) {
            compute_non_prefix_edges(
                index,
                txn,
                db_cache,
                word_interner,
                left_word,
                right_word,
                &[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(),
                forward_proximity,
                backward_proximity,
                &mut docids,
                universe,
                &mut used_words,
                &mut used_phrases,
            )?;
        }
    }
    Ok((docids, used_words, used_phrases))
 }
 fn compute_prefix_edges<'ctx>(
    index: &mut &crate::Index,
    txn: &'ctx RoTxn,
    db_cache: &mut DatabaseCache<'ctx>,
    word_interner: &mut DedupInterner<String>,
    left_word: Interned<String>,
    right_prefix: Interned<String>,
    left_phrase: Option<Interned<Phrase>>,
    forward_proximity: u8,
    backward_proximity: u8,
    docids: &mut RoaringBitmap,
    universe: &RoaringBitmap,
    used_words: &mut FxHashSet<Interned<String>>,
    used_phrases: &mut FxHashSet<Interned<Phrase>>,
 ) -> Result<()> {
    if let Some(phrase) = left_phrase {
        // TODO: compute the phrase, take the intersection between
        // the phrase and the docids
        used_phrases.insert(phrase); // This is not fully correct
    }
    if let Some(new_docids) = db_cache.get_word_prefix_pair_proximity_docids(
        index,
        txn,
        word_interner,
        left_word,
        right_prefix,
        forward_proximity,
    )? {
        let new_docids = universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
        if !new_docids.is_empty() {
            used_words.insert(left_word);
            used_words.insert(right_prefix);
            *docids |= new_docids;
        }
    }
    // No swapping when computing the proximity between a phrase and a word
    if left_phrase.is_none() {
        if let Some(new_docids) = db_cache.get_prefix_word_pair_proximity_docids(
            index,
            txn,
            word_interner,
            right_prefix,
            left_word,
            backward_proximity,
        )? {
            let new_docids = universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
            if !new_docids.is_empty() {
                used_words.insert(left_word);
                used_words.insert(right_prefix);
                *docids |= new_docids;
            }
        }
    }
    Ok(())
 }
 fn compute_non_prefix_edges<'ctx>(
    index: &mut &crate::Index,
    txn: &'ctx RoTxn,
    db_cache: &mut DatabaseCache<'ctx>,
    word_interner: &mut DedupInterner<String>,
    word1: Interned<String>,
    word2: Interned<String>,
    phrases: &[Interned<Phrase>],
    forward_proximity: u8,
    backward_proximity: u8,
    docids: &mut RoaringBitmap,
    universe: &RoaringBitmap,
    used_words: &mut FxHashSet<Interned<String>>,
    used_phrases: &mut FxHashSet<Interned<Phrase>>,
 ) -> Result<()> {
    if !phrases.is_empty() {
        // TODO: compute the docids associated with these phrases
        // take their intersection with the new docids
        used_phrases.extend(phrases); // This is not fully correct
    }
    if let Some(new_docids) = db_cache.get_word_pair_proximity_docids(
        index,
        txn,
        word_interner,
        word1,
        word2,
        forward_proximity,
    )? {
        let new_docids = universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
        if !new_docids.is_empty() {
            used_words.insert(word1);
            used_words.insert(word2);
            *docids |= new_docids;
        }
    }
    if backward_proximity >= 1
            // no swapping when either term is a phrase
            && phrases.is_empty()
    {
        if let Some(new_docids) = db_cache.get_word_pair_proximity_docids(
            index,
            txn,
            word_interner,
            word2,
            word1,
            backward_proximity,
        )? {
            let new_docids = universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
            if !new_docids.is_empty() {
                used_words.insert(word1);
                used_words.insert(word2);
                *docids |= new_docids;
            }
        }
    }
    Ok(())
 }
 fn last_word_of_term_iter<'t>(
    t: &'t QueryTerm,
    phrase_interner: &'t DedupInterner<Phrase>,
 ) -> impl Iterator<Item = (Option<Interned<Phrase>>, Interned<String>)> + 't {
    t.all_single_words_except_prefix_db().map(|w| (None, w)).chain(t.all_phrases().flat_map(
        move |p| {
            let phrase = phrase_interner.get(p);
            phrase.words.last().unwrap().map(|last| (Some(p), last))
        },
    ))
 }
 fn first_word_of_term_iter<'t>(
    t: &'t QueryTerm,
    phrase_interner: &'t DedupInterner<Phrase>,
 ) -> impl Iterator<Item = (Interned<String>, Option<Interned<Phrase>>)> + 't {
    t.all_single_words_except_prefix_db().map(|w| (w, None)).chain(t.all_phrases().flat_map(
        move |p| {
            let phrase = phrase_interner.get(p);
            phrase.words.first().unwrap().map(|first| (first, Some(p)))
        },
    ))
 }
--- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs
@ -1,9 +1,7 @@
 pub mod build;
 pub mod compute_docids;
-use std::collections::HashSet;
+use fxhash::FxHashSet;
 use std::iter::FromIterator;
 use roaring::RoaringBitmap;
 use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
@ -13,31 +11,17 @@ use crate::search::new::query_term::{Phrase, QueryTerm};
 use crate::search::new::{QueryGraph, QueryNode, SearchContext};
 use crate::Result;
 #[derive(Debug, Clone, PartialEq, Eq, Hash)]
 pub enum WordPair {
    Words {
        phrases: Vec<Interned<Phrase>>,
        left: Interned<String>,
        right: Interned<String>,
        proximity: u8,
    },
    WordPrefix {
        phrases: Vec<Interned<Phrase>>,
        left: Interned<String>,
        right_prefix: Interned<String>,
        proximity: u8,
    },
    WordPrefixSwapped {
        left_prefix: Interned<String>,
        right: Interned<String>,
        proximity: u8,
    },
 }
 #[derive(Clone, PartialEq, Eq, Hash)]
 pub enum ProximityCondition {
-    Term { term: Interned<QueryTerm> },
+    Uninit {
-    Pairs { pairs: Box<[WordPair]> },
+        left_term: Interned<QueryTerm>,
        right_term: Interned<QueryTerm>,
        right_term_ngram_len: u8,
        cost: u8,
    },
    Term {
        term: Interned<QueryTerm>,
    },
 }
 pub enum ProximityGraph {}
@ -49,7 +33,8 @@ impl RankingRuleGraphTrait for ProximityGraph {
        ctx: &mut SearchContext<'ctx>,
        condition: &Self::Condition,
        universe: &RoaringBitmap,
-    ) -> Result<roaring::RoaringBitmap> {
+    ) -> Result<(roaring::RoaringBitmap, FxHashSet<Interned<String>>, FxHashSet<Interned<Phrase>>)>
    {
        compute_docids::compute_docids(ctx, condition, universe)
    }
@ -79,107 +64,14 @@ impl RankingRuleGraphTrait for ProximityGraph {
        condition: &Self::Condition,
    ) -> Result<String> {
        match condition {
            ProximityCondition::Uninit { cost, .. } => {
                //  TODO
                Ok(format!("{cost}: cost"))
            }
            ProximityCondition::Term { term } => {
                let term = ctx.term_interner.get(*term);
                Ok(format!("{} : exists", ctx.word_interner.get(term.original)))
            }
            ProximityCondition::Pairs { pairs } => {
                let mut s = String::new();
                for pair in pairs.iter() {
                    match pair {
                        WordPair::Words { phrases, left, right, proximity } => {
                            let left = ctx.word_interner.get(*left);
                            let right = ctx.word_interner.get(*right);
                            if !phrases.is_empty() {
                                s.push_str(&format!("{} phrases + ", phrases.len()));
                            }
                            s.push_str(&format!("\"{left} {right}\": {proximity}\n"));
                        }
                        WordPair::WordPrefix { phrases, left, right_prefix, proximity } => {
                            let left = ctx.word_interner.get(*left);
                            let right = ctx.word_interner.get(*right_prefix);
                            if !phrases.is_empty() {
                                s.push_str(&format!("{} phrases + ", phrases.len()));
                            }
                            s.push_str(&format!("\"{left} {right}...\" : {proximity}\n"));
                        }
                        WordPair::WordPrefixSwapped { left_prefix, right, proximity } => {
                            let left = ctx.word_interner.get(*left_prefix);
                            let right = ctx.word_interner.get(*right);
                            s.push_str(&format!("\"{left}... {right}\" : {proximity}\n"));
                        }
                    }
                }
                Ok(s)
            }
        }
    }
    fn words_used_by_condition<'ctx>(
        ctx: &mut SearchContext<'ctx>,
        condition: &Self::Condition,
    ) -> Result<HashSet<Interned<String>>> {
        match condition {
            ProximityCondition::Term { term } => {
                let term = ctx.term_interner.get(*term);
                Ok(HashSet::from_iter(term.all_single_words_except_prefix_db()))
            }
            ProximityCondition::Pairs { pairs } => {
                let mut set = HashSet::new();
                for pair in pairs.iter() {
                    match pair {
                        WordPair::Words { phrases: _, left, right, proximity: _ } => {
                            set.insert(*left);
                            set.insert(*right);
                        }
                        WordPair::WordPrefix { phrases: _, left, right_prefix, proximity: _ } => {
                            set.insert(*left);
                            // TODO: this is not correct, there should be another trait method for collecting the prefixes
                            // to be used with the prefix DBs
                            set.insert(*right_prefix);
                        }
                        WordPair::WordPrefixSwapped { left_prefix, right, proximity: _ } => {
                            // TODO: this is not correct, there should be another trait method for collecting the prefixes
                            // to be used with the prefix DBs
                            set.insert(*left_prefix);
                            set.insert(*right);
                        }
                    }
                }
                Ok(set)
            }
        }
    }
    fn phrases_used_by_condition<'ctx>(
        ctx: &mut SearchContext<'ctx>,
        condition: &Self::Condition,
    ) -> Result<HashSet<Interned<Phrase>>> {
        match condition {
            ProximityCondition::Term { term } => {
                let term = ctx.term_interner.get(*term);
                Ok(HashSet::from_iter(term.all_phrases()))
            }
            ProximityCondition::Pairs { pairs } => {
                let mut set = HashSet::new();
                for pair in pairs.iter() {
                    match pair {
                        WordPair::Words { phrases, left: _, right: _, proximity: _ } => {
                            set.extend(phrases.iter().copied());
                        }
                        WordPair::WordPrefix {
                            phrases,
                            left: _,
                            right_prefix: _,
                            proximity: _,
                        } => {
                            set.extend(phrases.iter().copied());
                        }
                        WordPair::WordPrefixSwapped { left_prefix: _, right: _, proximity: _ } => {}
                    }
                }
                Ok(set)
            }
        }
    }
 }
--- a/milli/src/search/new/ranking_rule_graph/typo/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/typo/mod.rs
@ -1,7 +1,8 @@
-use std::collections::HashSet;
+// use std::collections::HashSet;
 use std::fmt::Write;
 use std::iter::FromIterator;
 use fxhash::FxHashSet;
 use roaring::RoaringBitmap;
 use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
@ -26,7 +27,7 @@ impl RankingRuleGraphTrait for TypoGraph {
        ctx: &mut SearchContext<'ctx>,
        condition: &Self::Condition,
        universe: &RoaringBitmap,
-    ) -> Result<RoaringBitmap> {
+    ) -> Result<(RoaringBitmap, FxHashSet<Interned<String>>, FxHashSet<Interned<Phrase>>)> {
        let SearchContext {
            index,
            txn,
@ -48,7 +49,12 @@ impl RankingRuleGraphTrait for TypoGraph {
                condition.term,
            )?;
-        Ok(docids)
+        let term = term_interner.get(condition.term);
        Ok((
            docids,
            FxHashSet::from_iter(term.all_single_words_except_prefix_db()),
            FxHashSet::from_iter(term.all_phrases()),
        ))
    }
    fn build_edges<'ctx>(
@ -202,21 +208,21 @@ impl RankingRuleGraphTrait for TypoGraph {
        Ok(s)
    }
-    fn words_used_by_condition<'ctx>(
+    // fn words_used_by_condition<'ctx>(
-        ctx: &mut SearchContext<'ctx>,
+    //     ctx: &mut SearchContext<'ctx>,
-        condition: &Self::Condition,
+    //     condition: &Self::Condition,
-    ) -> Result<HashSet<Interned<String>>> {
+    // ) -> Result<HashSet<Interned<String>>> {
-        let TypoCondition { term, .. } = condition;
+    //     let TypoCondition { term, .. } = condition;
-        let term = ctx.term_interner.get(*term);
+    //     let term = ctx.term_interner.get(*term);
-        Ok(HashSet::from_iter(term.all_single_words_except_prefix_db()))
+    //     Ok(HashSet::from_iter(term.all_single_words_except_prefix_db()))
-    }
+    // }
-    fn phrases_used_by_condition<'ctx>(
+    // fn phrases_used_by_condition<'ctx>(
-        ctx: &mut SearchContext<'ctx>,
+    //     ctx: &mut SearchContext<'ctx>,
-        condition: &Self::Condition,
+    //     condition: &Self::Condition,
-    ) -> Result<HashSet<Interned<Phrase>>> {
+    // ) -> Result<HashSet<Interned<Phrase>>> {
-        let TypoCondition { term, .. } = condition;
+    //     let TypoCondition { term, .. } = condition;
-        let term = ctx.term_interner.get(*term);
+    //     let term = ctx.term_interner.get(*term);
-        Ok(HashSet::from_iter(term.all_phrases()))
+    //     Ok(HashSet::from_iter(term.all_phrases()))
-    }
+    // }
 }
--- a/milli/src/search/new/ranking_rules.rs
+++ b/milli/src/search/new/ranking_rules.rs
@ -125,7 +125,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
    let mut results = vec![];
    let mut cur_offset = 0usize;
-    /// Add the candidates to the results. Take `distinct`, `from`, `limit`, and `cur_offset`
+    /// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
    /// into account and inform the logger.
    macro_rules! maybe_add_to_results {
        ($candidates:expr) => {
@ -181,6 +181,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
            cur_offset += len as usize;
        };
    }
    while results.len() < length {
        // The universe for this bucket is zero or one element, so we don't need to sort
        // anything, just extend the results and go back to the parent ranking rule.
--- a/milli/src/search/new/words.rs
+++ b/milli/src/search/new/words.rs
@ -9,9 +9,9 @@ use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
 use crate::{Result, TermsMatchingStrategy};
 pub struct Words {
-    exhausted: bool,
+    exhausted: bool, // TODO: remove
    query_graph: Option<QueryGraph>,
-    iterating: bool,
+    iterating: bool, // TODO: remove
    positions_to_remove: Vec<i8>,
    terms_matching_strategy: TermsMatchingStrategy,
 }