Fix: computation of initial universe, code organisation

2025-07-02 03:18:30 +02:00 · 2023-03-06 08:35:01 +01:00 · 2023-03-06 08:35:01 +01:00 · cab2b6bcda
commit cab2b6bcda
parent c4979a2fda
11 changed files with 341 additions and 275 deletions
--- a/milli/src/search/new/graph_based_ranking_rule.rs
+++ b/milli/src/search/new/graph_based_ranking_rule.rs
@ -3,8 +3,8 @@ use roaring::RoaringBitmap;
 use super::db_cache::DatabaseCache;
 use super::logger::SearchLogger;
-use super::ranking_rule_graph::edge_docids_cache::EdgeDocidsCache;
+use super::ranking_rule_graph::EdgeDocidsCache;
-use super::ranking_rule_graph::empty_paths_cache::EmptyPathsCache;
+use super::ranking_rule_graph::EmptyPathsCache;
 use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait};
 use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput};
--- a/milli/src/search/new/logger/detailed.rs
+++ b/milli/src/search/new/logger/detailed.rs
@ -5,13 +5,13 @@ use std::fs::File;
 use std::time::Instant;
 use std::{io::Write, path::PathBuf};
-use crate::new::ranking_rule_graph::typo::TypoGraph;
+use crate::new::ranking_rule_graph::TypoGraph;
 use crate::new::{QueryNode, QueryGraph};
 use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
-use crate::new::ranking_rule_graph::empty_paths_cache::EmptyPathsCache;
+use crate::new::ranking_rule_graph::EmptyPathsCache;
 use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
 use crate::new::ranking_rule_graph::{
-    proximity::ProximityGraph, RankingRuleGraph,
+    ProximityGraph, RankingRuleGraph,
 };
 use super::{RankingRule, SearchLogger};
@ -21,18 +21,18 @@ pub enum SearchEvents {
        ranking_rule_idx: usize,
        query: QueryGraph,
        universe: RoaringBitmap,
-        time: Instant,
+        time: Instant
    },
    RankingRuleNextBucket {
        ranking_rule_idx: usize,
        universe: RoaringBitmap,
        candidates: RoaringBitmap,
-        time: Instant,
+        time: Instant
    },
    RankingRuleEndIteration {
        ranking_rule_idx: usize,
        universe: RoaringBitmap,
-        time: Instant,
+        time: Instant
    },
    ExtendResults {
        new: Vec<u32>,
@ -56,13 +56,14 @@ pub enum SearchEvents {
        distances: Vec<Vec<u64>>,
        cost: u64,
    },
-    RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap, time: Instant, },
+    RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap, time: Instant },
 }
 pub struct DetailedSearchLogger {
    folder_path: PathBuf,
    initial_query: Option<QueryGraph>,
    initial_query_time: Option<Instant>,
    query_for_universe: Option<QueryGraph>,
    initial_universe: Option<RoaringBitmap>,
    ranking_rules_ids: Option<Vec<String>>,
    events: Vec<SearchEvents>,
@ -73,6 +74,7 @@ impl DetailedSearchLogger {
            folder_path: PathBuf::new().join(folder_path),
            initial_query: None,
            initial_query_time: None,
            query_for_universe: None,
            initial_universe: None,
            ranking_rules_ids: None,
            events: vec![],
@ -81,9 +83,13 @@ impl DetailedSearchLogger {
 }
 impl SearchLogger<QueryGraph> for DetailedSearchLogger {
-    fn initial_query(&mut self, query: &QueryGraph, time: Instant) {
+    fn initial_query(&mut self, query: &QueryGraph) {
        self.initial_query = Some(query.clone());
-        self.initial_query_time = Some(time);
+        self.initial_query_time = Some(Instant::now());
    }
    fn query_for_universe(&mut self, query: &QueryGraph) {
        self.query_for_universe = Some(query.clone());
    }
    fn initial_universe(&mut self, universe: &RoaringBitmap) {
@ -99,13 +105,13 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
        _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
        query: &QueryGraph,
        universe: &RoaringBitmap,
-        time: Instant,
+        
    ) {
        self.events.push(SearchEvents::RankingRuleStartIteration {
            ranking_rule_idx,
            query: query.clone(),
            universe: universe.clone(),
-            time,
+            time: Instant::now(),
        })
    }
@ -115,13 +121,13 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
        _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
        universe: &RoaringBitmap,
        candidates: &RoaringBitmap,
-        time: Instant,
+        
    ) {
        self.events.push(SearchEvents::RankingRuleNextBucket {
            ranking_rule_idx,
            universe: universe.clone(),
            candidates: candidates.clone(),
-            time,
+            time: Instant::now(),
        })
    }
    fn skip_bucket_ranking_rule<'transaction>(
@ -129,12 +135,12 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
        ranking_rule_idx: usize,
        _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
        candidates: &RoaringBitmap,
-        time: Instant,
+        
    ) {
        self.events.push(SearchEvents::RankingRuleSkipBucket {
            ranking_rule_idx,
            candidates: candidates.clone(),
-            time
+            time: Instant::now()
        })
    }
@ -143,12 +149,12 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
        ranking_rule_idx: usize,
        _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
        universe: &RoaringBitmap,
-        time: Instant,
+        
    ) {
        self.events.push(SearchEvents::RankingRuleEndIteration {
            ranking_rule_idx,
            universe: universe.clone(),
-            time
+            time: Instant::now()
        })
    }
    fn add_to_results(&mut self, docids: &[u32]) {
@ -184,6 +190,20 @@ impl DetailedSearchLogger {
        let index_path = self.folder_path.join("index.d2");
        let mut file = std::fs::File::create(index_path).unwrap();
        writeln!(&mut file, "direction: right").unwrap();
        writeln!(&mut file, "Initial Query Graph: {{").unwrap();
        let initial_query_graph = self.initial_query.as_ref().unwrap();
        Self::query_graph_d2_description(initial_query_graph, &mut file);
        writeln!(&mut file, "}}").unwrap();
        writeln!(&mut file, "Query Graph Used To Compute Universe: {{").unwrap();
        let query_graph_for_universe = self.query_for_universe.as_ref().unwrap();
        Self::query_graph_d2_description(query_graph_for_universe, &mut file);
        writeln!(&mut file, "}}").unwrap();
        let initial_universe = self.initial_universe.as_ref().unwrap();
        writeln!(&mut file, "Initial Universe Length {}", initial_universe.len()).unwrap();
        writeln!(&mut file, "Control Flow Between Ranking Rules: {{").unwrap();
        writeln!(&mut file, "shape: sequence_diagram").unwrap();
        for (idx, rr_id) in self.ranking_rules_ids.as_ref().unwrap().iter().enumerate() {
--- a/milli/src/search/new/logger/mod.rs
+++ b/milli/src/search/new/logger/mod.rs
@ -2,19 +2,17 @@
 pub mod detailed;
 use roaring::RoaringBitmap;
 use std::time::Instant;
 use super::{
-    ranking_rule_graph::{
+    ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph},
        empty_paths_cache::EmptyPathsCache, proximity::ProximityGraph, typo::TypoGraph,
        RankingRuleGraph,
    },
    RankingRule, RankingRuleQueryTrait,
 };
 pub struct DefaultSearchLogger;
 impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
-    fn initial_query(&mut self, _query: &Q, _time: Instant) {}
+    fn initial_query(&mut self, _query: &Q) {}
    fn query_for_universe(&mut self, _query: &Q) {}
    fn initial_universe(&mut self, _universe: &RoaringBitmap) {}
@ -26,7 +24,6 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
        _ranking_rule: &dyn RankingRule<'transaction, Q>,
        _query: &Q,
        _universe: &RoaringBitmap,
        _time: Instant,
    ) {
    }
@ -36,7 +33,6 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
        _ranking_rule: &dyn RankingRule<'transaction, Q>,
        _universe: &RoaringBitmap,
        _candidates: &RoaringBitmap,
        _time: Instant,
    ) {
    }
    fn skip_bucket_ranking_rule<'transaction>(
@ -44,7 +40,6 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
        _ranking_rule_idx: usize,
        _ranking_rule: &dyn RankingRule<'transaction, Q>,
        _candidates: &RoaringBitmap,
        _time: Instant,
    ) {
    }
@ -53,7 +48,6 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
        _ranking_rule_idx: usize,
        _ranking_rule: &dyn RankingRule<'transaction, Q>,
        _universe: &RoaringBitmap,
        _time: Instant,
    ) {
    }
@ -85,7 +79,10 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
 }
 pub trait SearchLogger<Q: RankingRuleQueryTrait> {
-    fn initial_query(&mut self, query: &Q, time: Instant);
+    fn initial_query(&mut self, query: &Q);
    fn query_for_universe(&mut self, query: &Q);
    fn initial_universe(&mut self, universe: &RoaringBitmap);
    fn ranking_rules(&mut self, rr: &[&mut dyn RankingRule<Q>]);
@ -96,7 +93,6 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
        ranking_rule: &dyn RankingRule<'transaction, Q>,
        query: &Q,
        universe: &RoaringBitmap,
        time: Instant,
    );
    fn next_bucket_ranking_rule<'transaction>(
        &mut self,
@ -104,21 +100,18 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
        ranking_rule: &dyn RankingRule<'transaction, Q>,
        universe: &RoaringBitmap,
        candidates: &RoaringBitmap,
        time: Instant,
    );
    fn skip_bucket_ranking_rule<'transaction>(
        &mut self,
        ranking_rule_idx: usize,
        ranking_rule: &dyn RankingRule<'transaction, Q>,
        candidates: &RoaringBitmap,
        time: Instant,
    );
    fn end_iteration_ranking_rule<'transaction>(
        &mut self,
        ranking_rule_idx: usize,
        ranking_rule: &dyn RankingRule<'transaction, Q>,
        universe: &RoaringBitmap,
        time: Instant,
    );
    fn add_to_results(&mut self, docids: &[u32]);
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@ -9,55 +9,113 @@ mod resolve_query_graph;
 mod sort;
 mod words;
-use charabia::Tokenize;
+use std::collections::BTreeSet;
 use heed::RoTxn;
 use query_graph::{QueryGraph, QueryNode};
 pub use ranking_rules::{
-    execute_search, RankingRule, RankingRuleOutput, RankingRuleOutputIter,
+    apply_ranking_rules, RankingRule, RankingRuleOutput, RankingRuleOutputIter,
    RankingRuleOutputIterWrapper, RankingRuleQueryTrait,
 };
 use crate::{
    new::query_term::located_query_terms_from_string, Filter, Index, Result, TermsMatchingStrategy,
 };
 use charabia::Tokenize;
 use db_cache::DatabaseCache;
 use heed::RoTxn;
 use query_graph::{QueryGraph, QueryNode};
 use roaring::RoaringBitmap;
-use self::db_cache::DatabaseCache;
+use self::{
-use self::query_term::{word_derivations, LocatedQueryTerm};
+    logger::SearchLogger,
-use crate::{Index, Result};
+    resolve_query_graph::{resolve_query_graph, NodeDocIdsCache},
 };
 pub enum BitmapOrAllRef<'s> {
    Bitmap(&'s RoaringBitmap),
    All,
 }
-pub fn make_query_graph<'transaction>(
+#[allow(clippy::too_many_arguments)]
 pub fn resolve_maximally_reduced_query_graph<'transaction>(
    index: &Index,
-    txn: &RoTxn,
+    txn: &'transaction heed::RoTxn,
    db_cache: &mut DatabaseCache<'transaction>,
    universe: &RoaringBitmap,
    query_graph: &QueryGraph,
    node_docids_cache: &mut NodeDocIdsCache,
    matching_strategy: TermsMatchingStrategy,
    logger: &mut dyn SearchLogger<QueryGraph>,
 ) -> Result<RoaringBitmap> {
    let mut graph = query_graph.clone();
    let mut positions_to_remove = match matching_strategy {
        TermsMatchingStrategy::Last => {
            let mut all_positions = BTreeSet::new();
            for n in query_graph.nodes.iter() {
                match n {
                    QueryNode::Term(term) => {
                        all_positions.extend(term.positions.clone().into_iter());
                    }
                    QueryNode::Deleted | QueryNode::Start | QueryNode::End => {}
                }
            }
            all_positions.into_iter().collect()
        }
        TermsMatchingStrategy::All => vec![],
    };
    // don't remove the first term
    positions_to_remove.remove(0);
    loop {
        if positions_to_remove.is_empty() {
            break;
        } else {
            let position_to_remove = positions_to_remove.pop().unwrap();
            let _ = graph.remove_words_at_position(position_to_remove);
        }
    }
    logger.query_for_universe(&graph);
    let docids = resolve_query_graph(index, txn, db_cache, node_docids_cache, &graph, universe)?;
    Ok(docids)
 }
 #[allow(clippy::too_many_arguments)]
 pub fn execute_search<'transaction>(
    index: &Index,
    txn: &'transaction RoTxn,
    db_cache: &mut DatabaseCache<'transaction>,
    query: &str,
-) -> Result<QueryGraph> {
+    filters: Option<Filter>,
    from: usize,
    length: usize,
    logger: &mut dyn SearchLogger<QueryGraph>,
 ) -> Result<Vec<u32>> {
    assert!(!query.is_empty());
-    let authorize_typos = index.authorize_typos(txn)?;
+    let query_terms = located_query_terms_from_string(index, txn, query.tokenize(), None).unwrap();
-    let min_len_one_typo = index.min_word_len_one_typo(txn)?;
+    let graph = QueryGraph::from_query(index, txn, db_cache, query_terms)?;
    let min_len_two_typos = index.min_word_len_two_typos(txn)?;
-    let exact_words = index.exact_words(txn)?;
+    logger.initial_query(&graph);
    let fst = index.words_fst(txn)?;
-    // TODO: get rid of this closure
+    let universe = if let Some(filters) = filters {
-    // also, ngrams can have one typo?
+        filters.evaluate(txn, index)?
-    let query = LocatedQueryTerm::from_query(query.tokenize(), None, move |word, is_prefix| {
+    } else {
-        let typos = if !authorize_typos
+        index.documents_ids(txn)?
-            || word.len() < min_len_one_typo as usize
+    };
-            || exact_words.as_ref().map_or(false, |fst| fst.contains(word))
+
-        {
+    let mut node_docids_cache = NodeDocIdsCache::default();
-            0
+
-        } else if word.len() < min_len_two_typos as usize {
+    let universe = resolve_maximally_reduced_query_graph(
-            1
+        index,
-        } else {
+        txn,
-            2
+        db_cache,
-        };
+        &universe,
-        word_derivations(index, txn, word, typos, is_prefix, &fst)
+        &graph,
-    })
+        &mut node_docids_cache,
-    .unwrap();
+        TermsMatchingStrategy::Last,
-    let graph = QueryGraph::from_query(index, txn, db_cache, query)?;
+        logger,
-    Ok(graph)
+    )?;
    // TODO: create ranking rules here, reuse the node docids cache for the words ranking rule
    logger.initial_universe(&universe);
    apply_ranking_rules(index, txn, db_cache, &graph, &universe, from, length, logger)
 }
--- a/milli/src/search/new/query_graph.rs
+++ b/milli/src/search/new/query_graph.rs
@ -4,7 +4,7 @@ use heed::RoTxn;
 use roaring::RoaringBitmap;
 use super::db_cache::DatabaseCache;
-use super::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
+use super::query_term::{self, LocatedQueryTerm, QueryTerm, WordDerivations};
 use crate::{Index, Result};
 #[derive(Debug, Clone)]
@ -31,6 +31,7 @@ pub struct QueryGraph {
 }
 fn _assert_sizes() {
    // TODO: QueryNodes are too big now, 184B is an unreasonable size
    let _: [u8; 184] = [0; std::mem::size_of::<QueryNode>()];
    let _: [u8; 48] = [0; std::mem::size_of::<Edges>()];
 }
@ -75,7 +76,7 @@ impl QueryGraph {
        index: &Index,
        txn: &RoTxn,
        _db_cache: &mut DatabaseCache<'transaction>,
-        query: Vec<LocatedQueryTerm>,
+        terms: Vec<LocatedQueryTerm>,
    ) -> Result<QueryGraph> {
        // TODO: maybe empty nodes should not be removed here, to compute
        // the score of the `words` ranking rule correctly
@ -90,8 +91,8 @@ impl QueryGraph {
            (vec![], vec![], vec![graph.root_node]);
        // TODO: split words / synonyms
-        for length in 1..=query.len() {
+        for length in 1..=terms.len() {
-            let query = &query[..length];
+            let query = &terms[..length];
            let term0 = query.last().unwrap();
@ -104,7 +105,7 @@ impl QueryGraph {
            if !prev1.is_empty() {
                if let Some((ngram2_str, ngram2_pos)) =
-                    LocatedQueryTerm::ngram2(&query[length - 2], &query[length - 1])
+                    query_term::ngram2(&query[length - 2], &query[length - 1])
                {
                    if word_set.contains(ngram2_str.as_bytes()) {
                        let ngram2 = LocatedQueryTerm {
@ -128,11 +129,9 @@ impl QueryGraph {
                }
            }
            if !prev2.is_empty() {
-                if let Some((ngram3_str, ngram3_pos)) = LocatedQueryTerm::ngram3(
+                if let Some((ngram3_str, ngram3_pos)) =
-                    &query[length - 3],
+                    query_term::ngram3(&query[length - 3], &query[length - 2], &query[length - 1])
-                    &query[length - 2],
+                {
                    &query[length - 1],
                ) {
                    if word_set.contains(ngram3_str.as_bytes()) {
                        let ngram3 = LocatedQueryTerm {
                            value: QueryTerm::Word {
@ -143,8 +142,9 @@ impl QueryGraph {
                                    one_typo: vec![],
                                    two_typos: vec![],
                                    use_prefix_db: false,
-                                    synonyms: vec![],  // TODO: ngram synonyms
+                                    synonyms: vec![], // TODO: ngram synonyms
                                    split_words: None, // TODO: maybe ngram split words?
                                                      // would be nice for typos like su nflower
                                },
                            },
                            positions: ngram3_pos,
--- a/milli/src/search/new/query_term.rs
+++ b/milli/src/search/new/query_term.rs
@ -178,9 +178,15 @@ fn split_best_frequency(
 #[derive(Debug, Clone)]
 pub enum QueryTerm {
    // TODO: should there be SplitWord, NGram2, and NGram3 variants?
    // NGram2 can have 1 typo and synonyms
    // NGram3 cannot have typos but can have synonyms
    // SplitWords are a phrase
    // Can NGrams be prefixes?
    Phrase { phrase: Phrase },
    Word { derivations: WordDerivations },
 }
 impl QueryTerm {
    pub fn original_single_word(&self) -> Option<&str> {
        match self {
@ -209,53 +215,77 @@ impl LocatedQueryTerm {
            QueryTerm::Word { derivations, .. } => derivations.is_empty(),
        }
    }
-    /// Create primitive query from tokenized query string,
+}
    /// the primitive query is an intermediate state to build the query tree.
    pub fn from_query(
        query: NormalizedTokenIter<Vec<u8>>,
        words_limit: Option<usize>,
        // TODO:` use index + txn + ? instead of closure
        derivations: impl Fn(&str, bool) -> Result<WordDerivations>,
    ) -> Result<Vec<LocatedQueryTerm>> {
        let mut primitive_query = Vec::new();
        let mut phrase = Vec::new();
-        let mut quoted = false;
+pub fn located_query_terms_from_string<'transaction>(
    index: &Index,
    txn: &'transaction RoTxn,
    query: NormalizedTokenIter<Vec<u8>>,
    words_limit: Option<usize>,
 ) -> Result<Vec<LocatedQueryTerm>> {
    let authorize_typos = index.authorize_typos(txn)?;
    let min_len_one_typo = index.min_word_len_one_typo(txn)?;
    let min_len_two_typos = index.min_word_len_two_typos(txn)?;
-        let parts_limit = words_limit.unwrap_or(usize::MAX);
+    let exact_words = index.exact_words(txn)?;
    let fst = index.words_fst(txn)?;
-        let mut position = -1i8;
+    let nbr_typos = |word: &str| {
-        let mut phrase_start = -1i8;
+        if !authorize_typos
-        let mut phrase_end = -1i8;
+            || word.len() < min_len_one_typo as usize
            || exact_words.as_ref().map_or(false, |fst| fst.contains(word))
        {
            0
        } else if word.len() < min_len_two_typos as usize {
            1
        } else {
            2
        }
    };
-        let mut peekable = query.peekable();
+    let derivations = |word: &str, is_prefix: bool| {
-        while let Some(token) = peekable.next() {
+        word_derivations(index, txn, word, nbr_typos(word), is_prefix, &fst)
-            // early return if word limit is exceeded
+    };
            if primitive_query.len() >= parts_limit {
                return Ok(primitive_query);
            }
-            match token.kind {
+    let mut primitive_query = Vec::new();
-                TokenKind::Word | TokenKind::StopWord => {
+    let mut phrase = Vec::new();
-                    position += 1;
+
-                    // 1. if the word is quoted we push it in a phrase-buffer waiting for the ending quote,
+    let mut quoted = false;
-                    // 2. if the word is not the last token of the query and is not a stop_word we push it as a non-prefix word,
+
-                    // 3. if the word is the last token of the query we push it as a prefix word.
+    let parts_limit = words_limit.unwrap_or(usize::MAX);
-                    if quoted {
+
-                        phrase_end = position;
+    let mut position = -1i8;
-                        if phrase.is_empty() {
+    let mut phrase_start = -1i8;
-                            phrase_start = position;
+    let mut phrase_end = -1i8;
-                        }
+
-                        if let TokenKind::StopWord = token.kind {
+    let mut peekable = query.peekable();
-                            phrase.push(None);
+    while let Some(token) = peekable.next() {
-                        } else {
+        // early return if word limit is exceeded
-                            // TODO: in a phrase, check that every word exists
+        if primitive_query.len() >= parts_limit {
-                            // otherwise return WordDerivations::Empty
+            return Ok(primitive_query);
-                            phrase.push(Some(token.lemma().to_string()));
+        }
-                        }
+
-                    } else if peekable.peek().is_some() {
+        match token.kind {
-                        if let TokenKind::StopWord = token.kind {
+            TokenKind::Word | TokenKind::StopWord => {
-                        } else {
+                position += 1;
                // 1. if the word is quoted we push it in a phrase-buffer waiting for the ending quote,
                // 2. if the word is not the last token of the query and is not a stop_word we push it as a non-prefix word,
                // 3. if the word is the last token of the query we push it as a prefix word.
                if quoted {
                    phrase_end = position;
                    if phrase.is_empty() {
                        phrase_start = position;
                    }
                    if let TokenKind::StopWord = token.kind {
                        phrase.push(None);
                    } else {
                        // TODO: in a phrase, check that every word exists
                        // otherwise return WordDerivations::Empty
                        phrase.push(Some(token.lemma().to_string()));
                    }
                } else if peekable.peek().is_some() {
                    match token.kind {
                        TokenKind::Word => {
                            let derivations = derivations(token.lemma(), false)?;
                            let located_term = LocatedQueryTerm {
                                value: QueryTerm::Word { derivations },
@ -263,100 +293,91 @@ impl LocatedQueryTerm {
                            };
                            primitive_query.push(located_term);
                        }
-                    } else {
+                        TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {}
-                        let derivations = derivations(token.lemma(), true)?;
+                    }
-                        let located_term = LocatedQueryTerm {
+                } else {
-                            value: QueryTerm::Word { derivations },
+                    let derivations = derivations(token.lemma(), true)?;
-                            positions: position..=position,
+                    let located_term = LocatedQueryTerm {
-                        };
+                        value: QueryTerm::Word { derivations },
-                        primitive_query.push(located_term);
+                        positions: position..=position,
                    };
                    primitive_query.push(located_term);
                }
            }
            TokenKind::Separator(separator_kind) => {
                match separator_kind {
                    SeparatorKind::Hard => {
                        position += 1;
                    }
                    SeparatorKind::Soft => {
                        position += 0;
                    }
                }
-                TokenKind::Separator(separator_kind) => {
+                let quote_count = token.lemma().chars().filter(|&s| s == '"').count();
-                    match separator_kind {
+                // swap quoted state if we encounter a double quote
-                        SeparatorKind::Hard => {
+                if quote_count % 2 != 0 {
-                            position += 1;
+                    quoted = !quoted;
-                        }
+                }
-                        SeparatorKind::Soft => {
+                // if there is a quote or a hard separator we close the phrase.
-                            position += 0;
+                if !phrase.is_empty() && (quote_count > 0 || separator_kind == SeparatorKind::Hard)
-                        }
+                {
-                    }
+                    let located_query_term = LocatedQueryTerm {
-                    let quote_count = token.lemma().chars().filter(|&s| s == '"').count();
+                        value: QueryTerm::Phrase {
-                    // swap quoted state if we encounter a double quote
+                            phrase: Phrase { words: mem::take(&mut phrase) },
-                    if quote_count % 2 != 0 {
+                        },
-                        quoted = !quoted;
+                        positions: phrase_start..=phrase_end,
-                    }
+                    };
-                    // if there is a quote or a hard separator we close the phrase.
+                    primitive_query.push(located_query_term);
                    if !phrase.is_empty()
                        && (quote_count > 0 || separator_kind == SeparatorKind::Hard)
                    {
                        let located_query_term = LocatedQueryTerm {
                            value: QueryTerm::Phrase {
                                phrase: Phrase { words: mem::take(&mut phrase) },
                            },
                            positions: phrase_start..=phrase_end,
                        };
                        primitive_query.push(located_query_term);
                    }
                }
                _ => (),
            }
            _ => (),
        }
        // If a quote is never closed, we consider all of the end of the query as a phrase.
        if !phrase.is_empty() {
            let located_query_term = LocatedQueryTerm {
                value: QueryTerm::Phrase { phrase: Phrase { words: mem::take(&mut phrase) } },
                positions: phrase_start..=phrase_end,
            };
            primitive_query.push(located_query_term);
        }
        Ok(primitive_query)
    }
    // If a quote is never closed, we consider all of the end of the query as a phrase.
    if !phrase.is_empty() {
        let located_query_term = LocatedQueryTerm {
            value: QueryTerm::Phrase { phrase: Phrase { words: mem::take(&mut phrase) } },
            positions: phrase_start..=phrase_end,
        };
        primitive_query.push(located_query_term);
    }
    Ok(primitive_query)
 }
-impl LocatedQueryTerm {
+// TODO: return a word derivations instead?
-    pub fn ngram2(
+pub fn ngram2(x: &LocatedQueryTerm, y: &LocatedQueryTerm) -> Option<(String, RangeInclusive<i8>)> {
-        x: &LocatedQueryTerm,
+    if *x.positions.end() != y.positions.start() - 1 {
-        y: &LocatedQueryTerm,
+        return None;
    ) -> Option<(String, RangeInclusive<i8>)> {
        if *x.positions.end() != y.positions.start() - 1 {
            println!(
                "x positions end: {}, y positions start: {}",
                *x.positions.end(),
                y.positions.start()
            );
            return None;
        }
        match (&x.value.original_single_word(), &y.value.original_single_word()) {
            (Some(w1), Some(w2)) => {
                let term = (format!("{w1}{w2}"), *x.positions.start()..=*y.positions.end());
                Some(term)
            }
            _ => None,
        }
    }
-    pub fn ngram3(
+    match (&x.value.original_single_word(), &y.value.original_single_word()) {
-        x: &LocatedQueryTerm,
+        (Some(w1), Some(w2)) => {
-        y: &LocatedQueryTerm,
+            let term = (format!("{w1}{w2}"), *x.positions.start()..=*y.positions.end());
-        z: &LocatedQueryTerm,
+            Some(term)
    ) -> Option<(String, RangeInclusive<i8>)> {
        if *x.positions.end() != y.positions.start() - 1
            || *y.positions.end() != z.positions.start() - 1
        {
            return None;
        }
        match (
            &x.value.original_single_word(),
            &y.value.original_single_word(),
            &z.value.original_single_word(),
        ) {
            (Some(w1), Some(w2), Some(w3)) => {
                let term = (format!("{w1}{w2}{w3}"), *x.positions.start()..=*z.positions.end());
                Some(term)
            }
            _ => None,
        }
        _ => None,
    }
 }
 pub fn ngram3(
    x: &LocatedQueryTerm,
    y: &LocatedQueryTerm,
    z: &LocatedQueryTerm,
 ) -> Option<(String, RangeInclusive<i8>)> {
    if *x.positions.end() != y.positions.start() - 1
        || *y.positions.end() != z.positions.start() - 1
    {
        return None;
    }
    match (
        &x.value.original_single_word(),
        &y.value.original_single_word(),
        &z.value.original_single_word(),
    ) {
        (Some(w1), Some(w2), Some(w3)) => {
            let term = (format!("{w1}{w2}{w3}"), *x.positions.start()..=*z.positions.end());
            Some(term)
        }
        _ => None,
    }
 }
--- a/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs
+++ b/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs
@ -41,11 +41,12 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
            EdgeDetails::Unconditional => Ok(BitmapOrAllRef::All),
            EdgeDetails::Data(details) => {
                if self.cache.contains_key(&edge_index) {
                    // TODO: should we update the bitmap in the cache if the new universe
                    // reduces it?
                    return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
                }
                // TODO: maybe universe doesn't belong here
                let docids = universe & G::compute_docids(index, txn, db_cache, details)?;
                let _ = self.cache.insert(edge_index, docids);
                let docids = &self.cache[&edge_index];
                Ok(BitmapOrAllRef::Bitmap(docids))
--- a/milli/src/search/new/ranking_rule_graph/mod.rs
+++ b/milli/src/search/new/ranking_rule_graph/mod.rs
@ -1,19 +1,22 @@
-pub mod build;
+mod build;
-pub mod cheapest_paths;
+mod cheapest_paths;
-pub mod edge_docids_cache;
+mod edge_docids_cache;
-pub mod empty_paths_cache;
+mod empty_paths_cache;
-pub mod paths_map;
+mod paths_map;
-pub mod proximity;
+mod proximity;
-pub mod resolve_paths;
+mod resolve_paths;
-pub mod typo;
+mod typo;
 pub use edge_docids_cache::EdgeDocidsCache;
 pub use empty_paths_cache::EmptyPathsCache;
 pub use proximity::ProximityGraph;
 pub use typo::TypoGraph;
 use std::ops::ControlFlow;
 use heed::RoTxn;
 use roaring::RoaringBitmap;
 use self::empty_paths_cache::EmptyPathsCache;
 use super::db_cache::DatabaseCache;
 use super::logger::SearchLogger;
 use super::{QueryGraph, QueryNode};
--- a/milli/src/search/new/ranking_rule_graph/proximity/build.rs
+++ b/milli/src/search/new/ranking_rule_graph/proximity/build.rs
@ -105,6 +105,7 @@ pub fn visit_to_node<'transaction, 'from_data>(
    assert!(!updb1);
    let derivations1 = derivations1.all_derivations_except_prefix_db();
    // TODO: eventually, we want to get rid of the uses from `orginal`
    let original_word_2 = derivations2.original.clone();
    let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new();
--- a/milli/src/search/new/ranking_rules.rs
+++ b/milli/src/search/new/ranking_rules.rs
@ -1,5 +1,3 @@
 use std::time::Instant;
 use heed::RoTxn;
 use roaring::RoaringBitmap;
@ -8,11 +6,11 @@ use super::logger::SearchLogger;
 use super::QueryGraph;
 use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
-use crate::new::ranking_rule_graph::proximity::ProximityGraph;
+use crate::new::ranking_rule_graph::ProximityGraph;
-use crate::new::ranking_rule_graph::typo::TypoGraph;
+use crate::new::ranking_rule_graph::TypoGraph;
 use crate::new::words::Words;
 // use crate::search::new::sort::Sort;
-use crate::{Filter, Index, Result, TermsMatchingStrategy};
+use crate::{Index, Result, TermsMatchingStrategy};
 pub trait RankingRuleOutputIter<'transaction, Query> {
    fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>>;
@ -100,18 +98,18 @@ pub struct RankingRuleOutput<Q> {
 // TODO: can make it generic over the query type (either query graph or placeholder) fairly easily
 #[allow(clippy::too_many_arguments)]
-pub fn execute_search<'transaction>(
+pub fn apply_ranking_rules<'transaction>(
    index: &Index,
    txn: &'transaction heed::RoTxn,
    // TODO: ranking rules parameter
    db_cache: &mut DatabaseCache<'transaction>,
    query_graph: &QueryGraph,
-    filters: Option<Filter>,
+    universe: &RoaringBitmap,
    from: usize,
    length: usize,
    logger: &mut dyn SearchLogger<QueryGraph>,
 ) -> Result<Vec<u32>> {
-    logger.initial_query(query_graph, Instant::now());
+    logger.initial_query(query_graph);
    let words = &mut Words::new(TermsMatchingStrategy::Last);
    // let sort = &mut Sort::new(index, txn, "release_date".to_owned(), true)?;
    let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned());
@ -122,25 +120,13 @@ pub fn execute_search<'transaction>(
    logger.ranking_rules(&ranking_rules);
    let universe = if let Some(filters) = filters {
        filters.evaluate(txn, index)?
    } else {
        index.documents_ids(txn)?
    };
    if universe.len() < from as u64 {
        return Ok(vec![]);
    }
    let ranking_rules_len = ranking_rules.len();
-    logger.start_iteration_ranking_rule(
+    logger.start_iteration_ranking_rule(0, ranking_rules[0], query_graph, universe);
-        0,
+    ranking_rules[0].start_iteration(index, txn, db_cache, logger, universe, query_graph)?;
        ranking_rules[0],
        query_graph,
        &universe,
        Instant::now(),
    );
    ranking_rules[0].start_iteration(index, txn, db_cache, logger, &universe, query_graph)?;
    let mut candidates = vec![RoaringBitmap::default(); ranking_rules_len];
    candidates[0] = universe.clone();
@ -154,7 +140,6 @@ pub fn execute_search<'transaction>(
                cur_ranking_rule_index,
                ranking_rules[cur_ranking_rule_index],
                &candidates[cur_ranking_rule_index],
                Instant::now(),
            );
            candidates[cur_ranking_rule_index].clear();
            ranking_rules[cur_ranking_rule_index].end_iteration(index, txn, db_cache, logger);
@ -183,7 +168,6 @@ pub fn execute_search<'transaction>(
                            cur_ranking_rule_index,
                            ranking_rules[cur_ranking_rule_index],
                            &candidates,
                            Instant::now(),
                        );
                    } else {
                        let all_candidates = candidates.iter().collect::<Vec<_>>();
@ -193,7 +177,6 @@ pub fn execute_search<'transaction>(
                            cur_ranking_rule_index,
                            ranking_rules[cur_ranking_rule_index],
                            &skipped_candidates.into_iter().collect(),
                            Instant::now(),
                        );
                        let candidates = candidates
                            .iter()
@ -234,7 +217,6 @@ pub fn execute_search<'transaction>(
            ranking_rules[cur_ranking_rule_index],
            &candidates[cur_ranking_rule_index],
            &next_bucket.candidates,
            Instant::now(),
        );
        assert!(candidates[cur_ranking_rule_index].is_superset(&next_bucket.candidates));
@ -255,7 +237,6 @@ pub fn execute_search<'transaction>(
            ranking_rules[cur_ranking_rule_index],
            &next_bucket.query,
            &candidates[cur_ranking_rule_index],
            Instant::now(),
        );
        ranking_rules[cur_ranking_rule_index].start_iteration(
            index,
@ -272,11 +253,11 @@ pub fn execute_search<'transaction>(
 #[cfg(test)]
 mod tests {
    use super::execute_search;
    // use crate::allocator::ALLOC;
    use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
    use crate::index::tests::TempIndex;
    use crate::new::db_cache::DatabaseCache;
    use crate::new::execute_search;
    use big_s::S;
    use heed::EnvOpenOptions;
    use maplit::hashset;
@ -284,8 +265,7 @@ mod tests {
    use std::io::{BufRead, BufReader, Cursor, Seek};
    use std::time::Instant;
    // use crate::new::logger::detailed::DetailedSearchLogger;
-    use crate::new::logger::{DefaultSearchLogger, SearchLogger};
+    use crate::new::logger::DefaultSearchLogger;
    use crate::new::make_query_graph;
    use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
    use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy};
@ -321,17 +301,20 @@ mod tests {
            ]))
            .unwrap();
        let txn = index.read_txn().unwrap();
        let mut logger = DefaultSearchLogger;
        let mut db_cache = DatabaseCache::default();
-        let query_graph =
+        let results = execute_search(
-            make_query_graph(&index, &txn, &mut db_cache, "releases from poison by the government")
+            &index,
-                .unwrap();
+            &txn,
-        logger.initial_query(&query_graph, Instant::now());
+            &mut db_cache,
            "releases from poison by the government",
            None,
            0,
            50,
            &mut DefaultSearchLogger,
        )
        .unwrap();
        let results =
            execute_search(&index, &txn, &mut db_cache, &query_graph, None, 0, 50, &mut logger)
                .unwrap();
        println!("{results:?}")
    }
@ -352,21 +335,13 @@ mod tests {
        let mut db_cache = DatabaseCache::default();
        let query_graph = make_query_graph(
            &index,
            &txn,
            &mut db_cache,
            "which a the releases from poison by the government",
        )
        .unwrap();
        // let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
        let results = execute_search(
            &index,
            &txn,
            &mut db_cache,
-            &query_graph,
+            "which a the releases from poison by the government",
            None,
            0,
            20,
@ -453,17 +428,13 @@ mod tests {
        let mut db_cache = DatabaseCache::default();
        let query_graph =
            make_query_graph(&index, &txn, &mut db_cache, "releases from poison by the government")
                .unwrap();
        let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
        let results = execute_search(
            &index,
            &txn,
            &mut db_cache,
-            &query_graph,
+            "releases from poison by the government",
            None,
            0,
            20,
--- a/milli/src/search/new/words.rs
+++ b/milli/src/search/new/words.rs
@ -43,12 +43,9 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
        _parent_candidates: &RoaringBitmap,
        parent_query_graph: &QueryGraph,
    ) -> Result<()> {
        // println!("Words: start iteration");
        self.exhausted = false;
        self.query_graph = Some(parent_query_graph.clone());
        // TODO: a phrase can contain many positions, but represents a single node.
        // That's a problem.
        let positions_to_remove = match self.terms_matching_strategy {
            TermsMatchingStrategy::Last => {
                let mut all_positions = BTreeSet::new();
@ -60,11 +57,13 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
                        QueryNode::Deleted | QueryNode::Start | QueryNode::End => {}
                    }
                }
-                all_positions.into_iter().collect()
+                let mut r: Vec<i8> = all_positions.into_iter().collect();
                // don't remove the first term
                r.remove(0);
                r
            }
            TermsMatchingStrategy::All => vec![],
        };
        // println!("positions to remove: {positions_to_remove:?}");
        self.positions_to_remove = positions_to_remove;
        self.iterating = true;
        Ok(())
@ -78,7 +77,6 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
        logger: &mut dyn SearchLogger<QueryGraph>,
        universe: &RoaringBitmap,
    ) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
        // println!("Words: next bucket");
        assert!(self.iterating);
        assert!(universe.len() > 1);
@ -122,9 +120,9 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
        _db_cache: &mut DatabaseCache<'transaction>,
        _logger: &mut dyn SearchLogger<QueryGraph>,
    ) {
        // println!("Words: end iteration");
        self.iterating = false;
        self.exhausted = true;
        self.positions_to_remove = vec![];
        self.query_graph = None;
    }
 }