Merge branch 'main' into indexer-edition-2024

2025-07-03 20:07:09 +02:00 · 2024-11-06 15:19:18 +01:00 · 2024-11-06 15:19:18 +01:00 · 10feeb88f2
commit 10feeb88f2
parent a9ecbf0b64 6b67f9fc4c
1122 changed files with 6265 additions and 5265 deletions
--- a/crates/milli/src/search/facet/facet_distribution.rs
+++ b/crates/milli/src/search/facet/facet_distribution.rs
@ -0,0 +1,846 @@
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::fmt::Display;
+use std::ops::ControlFlow;
+use std::{fmt, mem};
+
+use heed::types::Bytes;
+use heed::BytesDecode;
+use indexmap::IndexMap;
+use roaring::RoaringBitmap;
+use serde::{Deserialize, Serialize};
+
+use crate::error::UserError;
+use crate::facet::FacetType;
+use crate::heed_codec::facet::{
+    FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
+};
+use crate::heed_codec::{BytesRefCodec, StrRefCodec};
+use crate::search::facet::facet_distribution_iter::{
+    count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
+};
+use crate::{FieldId, Index, Result};
+
+/// The default number of values by facets that will
+/// be fetched from the key-value store.
+pub const DEFAULT_VALUES_PER_FACET: usize = 100;
+
+/// Threshold on the number of candidates that will make
+/// the system to choose between one algorithm or another.
+const CANDIDATES_THRESHOLD: u64 = 3000;
+
+/// How should we fetch the facets?
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum OrderBy {
+    /// By lexicographic order...
+    #[default]
+    Lexicographic,
+    /// Or by number of docids in common?
+    Count,
+}
+
+impl Display for OrderBy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            OrderBy::Lexicographic => f.write_str("alphabetically"),
+            OrderBy::Count => f.write_str("by count"),
+        }
+    }
+}
+
+pub struct FacetDistribution<'a> {
+    facets: Option<HashMap<String, OrderBy>>,
+    candidates: Option<RoaringBitmap>,
+    max_values_per_facet: usize,
+    default_order_by: OrderBy,
+    rtxn: &'a heed::RoTxn<'a>,
+    index: &'a Index,
+}
+
+impl<'a> FacetDistribution<'a> {
+    pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> FacetDistribution<'a> {
+        FacetDistribution {
+            facets: None,
+            candidates: None,
+            max_values_per_facet: DEFAULT_VALUES_PER_FACET,
+            default_order_by: OrderBy::default(),
+            rtxn,
+            index,
+        }
+    }
+
+    pub fn facets<I: IntoIterator<Item = (A, OrderBy)>, A: AsRef<str>>(
+        &mut self,
+        names_ordered_by: I,
+    ) -> &mut Self {
+        self.facets = Some(
+            names_ordered_by
+                .into_iter()
+                .map(|(name, order_by)| (name.as_ref().to_string(), order_by))
+                .collect(),
+        );
+        self
+    }
+
+    pub fn max_values_per_facet(&mut self, max: usize) -> &mut Self {
+        self.max_values_per_facet = max;
+        self
+    }
+
+    pub fn default_order_by(&mut self, order_by: OrderBy) -> &mut Self {
+        self.default_order_by = order_by;
+        self
+    }
+
+    pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self {
+        self.candidates = Some(candidates);
+        self
+    }
+
+    /// There is a small amount of candidates OR we ask for facet string values so we
+    /// decide to iterate over the facet values of each one of them, one by one.
+    fn facet_distribution_from_documents(
+        &self,
+        field_id: FieldId,
+        facet_type: FacetType,
+        candidates: &RoaringBitmap,
+        distribution: &mut IndexMap<String, u64>,
+    ) -> heed::Result<()> {
+        match facet_type {
+            FacetType::Number => {
+                let mut lexicographic_distribution = BTreeMap::new();
+                let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
+
+                let db = self.index.field_id_docid_facet_f64s;
+                for docid in candidates {
+                    key_buffer.truncate(mem::size_of::<FieldId>());
+                    key_buffer.extend_from_slice(&docid.to_be_bytes());
+                    let iter = db
+                        .remap_key_type::<Bytes>()
+                        .prefix_iter(self.rtxn, &key_buffer)?
+                        .remap_key_type::<FieldDocIdFacetF64Codec>();
+
+                    for result in iter {
+                        let ((_, _, value), ()) = result?;
+                        *lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1;
+                    }
+                }
+
+                distribution.extend(
+                    lexicographic_distribution
+                        .into_iter()
+                        .take(self.max_values_per_facet.saturating_sub(distribution.len())),
+                );
+            }
+            FacetType::String => {
+                let mut normalized_distribution = BTreeMap::new();
+                let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
+
+                let db = self.index.field_id_docid_facet_strings;
+                for docid in candidates {
+                    key_buffer.truncate(mem::size_of::<FieldId>());
+                    key_buffer.extend_from_slice(&docid.to_be_bytes());
+                    let iter = db
+                        .remap_key_type::<Bytes>()
+                        .prefix_iter(self.rtxn, &key_buffer)?
+                        .remap_key_type::<FieldDocIdFacetStringCodec>();
+
+                    for result in iter {
+                        let ((_, _, normalized_value), original_value) = result?;
+                        let (_, count) = normalized_distribution
+                            .entry(normalized_value)
+                            .or_insert_with(|| (original_value, 0));
+                        *count += 1;
+
+                        // we'd like to break here if we have enough facet values, but we are collecting them by increasing docid,
+                        // so higher ranked facets could be in later docids
+                    }
+                }
+
+                let iter = normalized_distribution
+                    .into_iter()
+                    .take(self.max_values_per_facet.saturating_sub(distribution.len()))
+                    .map(|(_normalized, (original, count))| (original.to_string(), count));
+                distribution.extend(iter);
+            }
+        }
+
+        Ok(())
+    }
+
+    /// There is too much documents, we use the facet levels to move throught
+    /// the facet values, to find the candidates and values associated.
+    fn facet_numbers_distribution_from_facet_levels(
+        &self,
+        field_id: FieldId,
+        candidates: &RoaringBitmap,
+        order_by: OrderBy,
+        distribution: &mut IndexMap<String, u64>,
+    ) -> heed::Result<()> {
+        let search_function = match order_by {
+            OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution,
+            OrderBy::Count => count_iterate_over_facet_distribution,
+        };
+
+        search_function(
+            self.rtxn,
+            self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
+            field_id,
+            candidates,
+            |facet_key, nbr_docids, _| {
+                let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap();
+                distribution.insert(facet_key.to_string(), nbr_docids);
+                if distribution.len() == self.max_values_per_facet {
+                    Ok(ControlFlow::Break(()))
+                } else {
+                    Ok(ControlFlow::Continue(()))
+                }
+            },
+        )
+    }
+
+    fn facet_strings_distribution_from_facet_levels(
+        &self,
+        field_id: FieldId,
+        candidates: &RoaringBitmap,
+        order_by: OrderBy,
+        distribution: &mut IndexMap<String, u64>,
+    ) -> heed::Result<()> {
+        let search_function = match order_by {
+            OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution,
+            OrderBy::Count => count_iterate_over_facet_distribution,
+        };
+
+        search_function(
+            self.rtxn,
+            self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
+            field_id,
+            candidates,
+            |facet_key, nbr_docids, any_docid| {
+                let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
+
+                let key: (FieldId, _, &str) = (field_id, any_docid, facet_key);
+                let original_string = self
+                    .index
+                    .field_id_docid_facet_strings
+                    .get(self.rtxn, &key)?
+                    .unwrap()
+                    .to_owned();
+
+                distribution.insert(original_string, nbr_docids);
+                if distribution.len() == self.max_values_per_facet {
+                    Ok(ControlFlow::Break(()))
+                } else {
+                    Ok(ControlFlow::Continue(()))
+                }
+            },
+        )
+    }
+
+    fn facet_values(
+        &self,
+        field_id: FieldId,
+        order_by: OrderBy,
+    ) -> heed::Result<IndexMap<String, u64>> {
+        use FacetType::{Number, String};
+
+        let mut distribution = IndexMap::new();
+        match (order_by, &self.candidates) {
+            (OrderBy::Lexicographic, Some(cnd)) if cnd.len() <= CANDIDATES_THRESHOLD => {
+                // Classic search, candidates were specified, we must return facet values only related
+                // to those candidates. We also enter here for facet strings for performance reasons.
+                self.facet_distribution_from_documents(field_id, Number, cnd, &mut distribution)?;
+                self.facet_distribution_from_documents(field_id, String, cnd, &mut distribution)?;
+            }
+            _ => {
+                let universe;
+                let candidates = match &self.candidates {
+                    Some(cnd) => cnd,
+                    None => {
+                        universe = self.index.documents_ids(self.rtxn)?;
+                        &universe
+                    }
+                };
+
+                self.facet_numbers_distribution_from_facet_levels(
+                    field_id,
+                    candidates,
+                    order_by,
+                    &mut distribution,
+                )?;
+                self.facet_strings_distribution_from_facet_levels(
+                    field_id,
+                    candidates,
+                    order_by,
+                    &mut distribution,
+                )?;
+            }
+        };
+
+        Ok(distribution)
+    }
+
+    pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
+        let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
+        let filterable_fields = self.index.filterable_fields(self.rtxn)?;
+        let candidates = if let Some(candidates) = self.candidates.clone() {
+            candidates
+        } else {
+            return Ok(Default::default());
+        };
+
+        let fields = match &self.facets {
+            Some(facets) => {
+                let invalid_fields: HashSet<_> = facets
+                    .iter()
+                    .map(|(name, _)| name)
+                    .filter(|facet| !crate::is_faceted(facet, &filterable_fields))
+                    .collect();
+                if !invalid_fields.is_empty() {
+                    return Err(UserError::InvalidFacetsDistribution {
+                        invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
+                        valid_facets_name: filterable_fields.into_iter().collect(),
+                    }
+                    .into());
+                } else {
+                    facets.iter().map(|(name, _)| name).cloned().collect()
+                }
+            }
+            None => filterable_fields,
+        };
+
+        let mut distribution = BTreeMap::new();
+        for (fid, name) in fields_ids_map.iter() {
+            if crate::is_faceted(name, &fields) {
+                let min_value = if let Some(min_value) = crate::search::facet::facet_min_value(
+                    self.index,
+                    self.rtxn,
+                    fid,
+                    candidates.clone(),
+                )? {
+                    min_value
+                } else {
+                    continue;
+                };
+                let max_value = if let Some(max_value) = crate::search::facet::facet_max_value(
+                    self.index,
+                    self.rtxn,
+                    fid,
+                    candidates.clone(),
+                )? {
+                    max_value
+                } else {
+                    continue;
+                };
+
+                distribution.insert(name.to_string(), (min_value, max_value));
+            }
+        }
+
+        Ok(distribution)
+    }
+
+    pub fn execute(&self) -> Result<BTreeMap<String, IndexMap<String, u64>>> {
+        let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
+        let filterable_fields = self.index.filterable_fields(self.rtxn)?;
+
+        let fields = match self.facets {
+            Some(ref facets) => {
+                let invalid_fields: HashSet<_> = facets
+                    .iter()
+                    .map(|(name, _)| name)
+                    .filter(|facet| !crate::is_faceted(facet, &filterable_fields))
+                    .collect();
+                if !invalid_fields.is_empty() {
+                    return Err(UserError::InvalidFacetsDistribution {
+                        invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
+                        valid_facets_name: filterable_fields.into_iter().collect(),
+                    }
+                    .into());
+                } else {
+                    facets.iter().map(|(name, _)| name).cloned().collect()
+                }
+            }
+            None => filterable_fields,
+        };
+
+        let mut distribution = BTreeMap::new();
+        for (fid, name) in fields_ids_map.iter() {
+            if crate::is_faceted(name, &fields) {
+                let order_by = self
+                    .facets
+                    .as_ref()
+                    .and_then(|facets| facets.get(name).copied())
+                    .unwrap_or(self.default_order_by);
+                let values = self.facet_values(fid, order_by)?;
+                distribution.insert(name.to_string(), values);
+            }
+        }
+
+        Ok(distribution)
+    }
+}
+
+impl fmt::Debug for FacetDistribution<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let FacetDistribution {
+            facets,
+            candidates,
+            max_values_per_facet,
+            default_order_by,
+            rtxn: _,
+            index: _,
+        } = self;
+
+        f.debug_struct("FacetDistribution")
+            .field("facets", facets)
+            .field("candidates", candidates)
+            .field("max_values_per_facet", max_values_per_facet)
+            .field("default_order_by", default_order_by)
+            .finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::iter;
+
+    use big_s::S;
+    use maplit::hashset;
+
+    use crate::documents::documents_batch_reader_from_objects;
+    use crate::index::tests::TempIndex;
+    use crate::{milli_snap, FacetDistribution, OrderBy};
+
+    #[test]
+    fn few_candidates_few_facet_values() {
+        // All the tests here avoid using the code in `facet_distribution_iter` because there aren't
+        // enough candidates.
+
+        let mut index = TempIndex::new();
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let documents = documents!([
+            { "colour": "Blue" },
+            { "colour": "  blue" },
+            { "colour": "RED" }
+        ]);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates([0, 1, 2].iter().copied().collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates([1, 2].iter().copied().collect())
+            .execute()
+            .unwrap();
+
+        // I think it would be fine if "  blue" was "Blue" instead.
+        // We just need to get any non-normalised string I think, even if it's not in
+        // the candidates
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"  blue": 1, "RED": 1}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates([2].iter().copied().collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"RED": 1}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates([0, 1, 2].iter().copied().collect())
+            .max_values_per_facet(1)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::Count)))
+            .candidates([0, 1, 2].iter().copied().collect())
+            .max_values_per_facet(1)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###);
+    }
+
+    #[test]
+    fn many_candidates_few_facet_values() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = ["Red", "RED", " red ", "Blue", "BLUE"];
+
+        let mut documents = vec![];
+        for i in 0..10_000 {
+            let document = serde_json::json!({
+                "colour": facet_values[i % 5],
+            })
+            .as_object()
+            .unwrap()
+            .clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .max_values_per_facet(1)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..10_000).collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..5_000).collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..5_000).collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..5_000).collect())
+            .max_values_per_facet(1)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::Count)))
+            .candidates((0..5_000).collect())
+            .max_values_per_facet(1)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Red": 3000}}"###);
+    }
+
+    #[test]
+    fn many_candidates_many_facet_values() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = (0..1000).map(|x| format!("{x:x}")).collect::<Vec<_>>();
+
+        let mut documents = vec![];
+        for i in 0..10_000 {
+            let document = serde_json::json!({
+                "colour": facet_values[i % 1000],
+            })
+            .as_object()
+            .unwrap()
+            .clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates", @"ac9229ed5964d893af96a7076e2f8af5");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .max_values_per_facet(2)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates_with_max_2", @r###"{"colour": {"0": 10, "1": 10}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..10_000).collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_10_000", @"ac9229ed5964d893af96a7076e2f8af5");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..5_000).collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992");
+    }
+
+    #[test]
+    fn facet_stats() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = (0..1000).collect::<Vec<_>>();
+
+        let mut documents = vec![];
+        for i in 0..1000 {
+            let document = serde_json::json!({
+                "colour": facet_values[i % 1000],
+            })
+            .as_object()
+            .unwrap()
+            .clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..1000).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((217..777).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
+    }
+
+    #[test]
+    fn facet_stats_array() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = (0..1000).collect::<Vec<_>>();
+
+        let mut documents = vec![];
+        for i in 0..1000 {
+            let document = serde_json::json!({
+                "colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
+            })
+            .as_object()
+            .unwrap()
+            .clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..1000).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1999.0)}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((217..777).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 1776.0)}"###);
+    }
+
+    #[test]
+    fn facet_stats_mixed_array() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = (0..1000).collect::<Vec<_>>();
+
+        let mut documents = vec![];
+        for i in 0..1000 {
+            let document = serde_json::json!({
+                "colour": [facet_values[i % 1000], format!("{}", facet_values[i % 1000] + 1000)],
+            })
+            .as_object()
+            .unwrap()
+            .clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..1000).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((217..777).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
+    }
+
+    #[test]
+    fn facet_mixed_values() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = (0..1000).collect::<Vec<_>>();
+
+        let mut documents = vec![];
+        for i in 0..1000 {
+            let document = if i % 2 == 0 {
+                serde_json::json!({
+                    "colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
+                })
+            } else {
+                serde_json::json!({
+                    "colour": format!("{}", facet_values[i % 1000] + 10000),
+                })
+            };
+            let document = document.as_object().unwrap().clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..1000).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1998.0)}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((217..777).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (218.0, 1776.0)}"###);
+    }
+}
--- a/crates/milli/src/search/facet/facet_distribution_iter.rs
+++ b/crates/milli/src/search/facet/facet_distribution_iter.rs
@ -0,0 +1,301 @@
+use std::cmp::Reverse;
+use std::collections::BinaryHeap;
+use std::ops::ControlFlow;
+
+use heed::Result;
+use roaring::RoaringBitmap;
+
+use super::{get_first_facet_value, get_highest_level};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
+};
+use crate::heed_codec::BytesRefCodec;
+use crate::{CboRoaringBitmapCodec, DocumentId};
+
+/// Call the given closure on the facet distribution of the candidate documents.
+///
+/// The arguments to the closure are:
+/// - the facet value, as a byte slice
+/// - the number of documents among the candidates that contain this facet value
+/// - the id of a document which contains the facet value. Note that this document
+///   is not necessarily from the list of candidates, it is simply *any* document which
+///   contains this facet value.
+///
+/// The return value of the closure is a `ControlFlow<()>` which indicates whether we should
+/// keep iterating over the different facet values or stop.
+pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    candidates: &RoaringBitmap,
+    callback: CB,
+) -> Result<()>
+where
+    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
+{
+    let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
+    let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
+    let highest_level = get_highest_level(rtxn, db, field_id)?;
+
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
+        fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
+        Ok(())
+    } else {
+        Ok(())
+    }
+}
+
+pub fn count_iterate_over_facet_distribution<'t, CB>(
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    candidates: &RoaringBitmap,
+    mut callback: CB,
+) -> Result<()>
+where
+    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
+{
+    /// # Important
+    /// The order of the fields determines the order in which the facet values will be returned.
+    /// This struct is inserted in a BinaryHeap and popped later on.
+    #[derive(Debug, PartialOrd, Ord, PartialEq, Eq)]
+    struct LevelEntry<'t> {
+        /// The number of candidates in this entry.
+        count: u64,
+        /// The key level of the entry.
+        level: Reverse<u8>,
+        /// The left bound key.
+        left_bound: &'t [u8],
+        /// The number of keys we must look for after `left_bound`.
+        group_size: u8,
+        /// Any docid in the set of matching documents. Used to find the original facet string.
+        any_docid: u32,
+    }
+
+    // Represents the list of keys that we must explore.
+    let mut heap = BinaryHeap::new();
+    let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
+    let highest_level = get_highest_level(rtxn, db, field_id)?;
+
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
+        // We first fill the heap with values from the highest level
+        let starting_key =
+            FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
+        for el in db.range(rtxn, &(&starting_key..))?.take(usize::MAX) {
+            let (key, value) = el?;
+            // The range is unbounded on the right and the group size for the highest level is MAX,
+            // so we need to check that we are not iterating over the next field id
+            if key.field_id != field_id {
+                break;
+            }
+            let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
+            let count = intersection.len();
+            if count != 0 {
+                heap.push(LevelEntry {
+                    count,
+                    level: Reverse(key.level),
+                    left_bound: key.left_bound,
+                    group_size: value.size,
+                    any_docid: intersection.min().unwrap(),
+                });
+            }
+        }
+
+        while let Some(LevelEntry { count, level, left_bound, group_size, any_docid }) = heap.pop()
+        {
+            if let Reverse(0) = level {
+                match (callback)(left_bound, count, any_docid)? {
+                    ControlFlow::Continue(_) => (),
+                    ControlFlow::Break(_) => return Ok(()),
+                }
+            } else {
+                let starting_key = FacetGroupKey { field_id, level: level.0 - 1, left_bound };
+                for el in db.range(rtxn, &(&starting_key..))?.take(group_size as usize) {
+                    let (key, value) = el?;
+                    // The range is unbounded on the right and the group size for the highest level is MAX,
+                    // so we need to check that we are not iterating over the next field id
+                    if key.field_id != field_id {
+                        break;
+                    }
+                    let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
+                        value.bitmap_bytes,
+                        candidates,
+                    )?;
+                    let count = intersection.len();
+                    if count != 0 {
+                        heap.push(LevelEntry {
+                            count,
+                            level: Reverse(key.level),
+                            left_bound: key.left_bound,
+                            group_size: value.size,
+                            any_docid: intersection.min().unwrap(),
+                        });
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// Iterate over the facets values by lexicographic order.
+struct LexicographicFacetDistribution<'t, CB>
+where
+    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
+{
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
+    field_id: u16,
+    callback: CB,
+}
+
+impl<'t, CB> LexicographicFacetDistribution<'t, CB>
+where
+    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
+{
+    fn iterate_level_0(
+        &mut self,
+        candidates: &RoaringBitmap,
+        starting_bound: &'t [u8],
+        group_size: usize,
+    ) -> Result<ControlFlow<()>> {
+        let starting_key =
+            FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_bound };
+        let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size);
+        for el in iter {
+            let (key, value) = el?;
+            // The range is unbounded on the right and the group size for the highest level is MAX,
+            // so we need to check that we are not iterating over the next field id
+            if key.field_id != self.field_id {
+                return Ok(ControlFlow::Break(()));
+            }
+            let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
+            if !docids_in_common.is_empty() {
+                let any_docid_in_common = docids_in_common.min().unwrap();
+                match (self.callback)(key.left_bound, docids_in_common.len(), any_docid_in_common)?
+                {
+                    ControlFlow::Continue(_) => (),
+                    ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
+                }
+            }
+        }
+        Ok(ControlFlow::Continue(()))
+    }
+
+    fn iterate(
+        &mut self,
+        candidates: &RoaringBitmap,
+        level: u8,
+        starting_bound: &'t [u8],
+        group_size: usize,
+    ) -> Result<ControlFlow<()>> {
+        if level == 0 {
+            return self.iterate_level_0(candidates, starting_bound, group_size);
+        }
+        let starting_key =
+            FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound };
+        let iter = self.db.range(self.rtxn, &(&starting_key..))?.take(group_size);
+
+        for el in iter {
+            let (key, value) = el?;
+            // The range is unbounded on the right and the group size for the highest level is MAX,
+            // so we need to check that we are not iterating over the next field id
+            if key.field_id != self.field_id {
+                return Ok(ControlFlow::Break(()));
+            }
+            let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
+            if !docids_in_common.is_empty() {
+                let cf = self.iterate(
+                    &docids_in_common,
+                    level - 1,
+                    key.left_bound,
+                    value.size as usize,
+                )?;
+                match cf {
+                    ControlFlow::Continue(_) => (),
+                    ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
+                }
+            }
+        }
+        Ok(ControlFlow::Continue(()))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::ControlFlow;
+
+    use heed::BytesDecode;
+    use roaring::RoaringBitmap;
+
+    use super::lexicographically_iterate_over_facet_distribution;
+    use crate::heed_codec::facet::OrderedF64Codec;
+    use crate::milli_snap;
+    use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
+
+    #[test]
+    fn filter_distribution_all() {
+        let indexes = [get_simple_index(), get_random_looking_index()];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (0..=255).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            lexicographically_iterate_over_facet_distribution(
+                &txn,
+                index.content,
+                0,
+                &candidates,
+                |facet, count, _| {
+                    let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
+                    results.push_str(&format!("{facet}: {count}\n"));
+                    Ok(ControlFlow::Continue(()))
+                },
+            )
+            .unwrap();
+            milli_snap!(results, i);
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_distribution_all_stop_early() {
+        let indexes = [get_simple_index(), get_random_looking_index()];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (0..=255).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            let mut nbr_facets = 0;
+            lexicographically_iterate_over_facet_distribution(
+                &txn,
+                index.content,
+                0,
+                &candidates,
+                |facet, count, _| {
+                    let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
+                    if nbr_facets == 100 {
+                        Ok(ControlFlow::Break(()))
+                    } else {
+                        nbr_facets += 1;
+                        results.push_str(&format!("{facet}: {count}\n"));
+                        Ok(ControlFlow::Continue(()))
+                    }
+                },
+            )
+            .unwrap();
+            milli_snap!(results, i);
+
+            txn.commit().unwrap();
+        }
+    }
+}
--- a/crates/milli/src/search/facet/facet_range_search.rs
+++ b/crates/milli/src/search/facet/facet_range_search.rs
@ -0,0 +1,688 @@
+use std::ops::{Bound, RangeBounds};
+
+use heed::BytesEncode;
+use roaring::RoaringBitmap;
+
+use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
+};
+use crate::heed_codec::BytesRefCodec;
+use crate::{CboRoaringBitmapCodec, Result};
+
+/// Find all the document ids for which the given field contains a value contained within
+/// the two bounds.
+pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BoundCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
+    right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
+    universe: Option<&RoaringBitmap>,
+    docids: &mut RoaringBitmap,
+) -> Result<()>
+where
+    BoundCodec: for<'a> BytesEncode<'a>,
+    for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Sized,
+{
+    let inner;
+    let left = match left {
+        Bound::Included(left) => {
+            inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
+            Bound::Included(inner.as_ref())
+        }
+        Bound::Excluded(left) => {
+            inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
+            Bound::Excluded(inner.as_ref())
+        }
+        Bound::Unbounded => Bound::Unbounded,
+    };
+    let inner;
+    let right = match right {
+        Bound::Included(right) => {
+            inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
+            Bound::Included(inner.as_ref())
+        }
+        Bound::Excluded(right) => {
+            inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
+            Bound::Excluded(inner.as_ref())
+        }
+        Bound::Unbounded => Bound::Unbounded,
+    };
+    let db = db.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>();
+    let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, universe, docids };
+    let highest_level = get_highest_level(rtxn, db, field_id)?;
+
+    if let Some(starting_left_bound) =
+        get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?
+    {
+        let rightmost_bound =
+            Bound::Included(get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
+        let group_size = usize::MAX;
+        f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
+        Ok(())
+    } else {
+        Ok(())
+    }
+}
+
+/// Fetch the document ids that have a facet with a value between the two given bounds
+struct FacetRangeSearch<'t, 'b, 'bitmap> {
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
+    field_id: u16,
+    left: Bound<&'b [u8]>,
+    right: Bound<&'b [u8]>,
+    /// The subset of documents ids that are useful for this search.
+    /// Great performance optimizations can be achieved by only fetching values matching this subset.
+    universe: Option<&'bitmap RoaringBitmap>,
+    docids: &'bitmap mut RoaringBitmap,
+}
+
+impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
+    fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
+        let left_key =
+            FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };
+        let iter = self.db.range(self.rtxn, &(left_key..))?.take(group_size);
+        for el in iter {
+            let (key, value) = el?;
+            // the right side of the iter range is unbounded, so we need to make sure that we are not iterating
+            // on the next field id
+            if key.field_id != self.field_id {
+                return Ok(());
+            }
+            let should_skip = {
+                match self.left {
+                    Bound::Included(left) => left > key.left_bound,
+                    Bound::Excluded(left) => left >= key.left_bound,
+                    Bound::Unbounded => false,
+                }
+            };
+            if should_skip {
+                continue;
+            }
+            let should_stop = {
+                match self.right {
+                    Bound::Included(right) => right < key.left_bound,
+                    Bound::Excluded(right) => right <= key.left_bound,
+                    Bound::Unbounded => false,
+                }
+            };
+            if should_stop {
+                break;
+            }
+
+            if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
+                *self.docids |= match self.universe {
+                    Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                        value.bitmap_bytes,
+                        universe,
+                    )?,
+                    None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
+                };
+            }
+        }
+        Ok(())
+    }
+
+    /// Recursive part of the algorithm for level > 0.
+    ///
+    /// It works by visiting a slice of a level and checking whether the range asscociated
+    /// with each visited element is contained within the bounds.
+    ///
+    /// 1. So long as the element's range is less than the left bound, we do nothing and keep iterating
+    /// 2. If the element's range is fully contained by the bounds, then all of its docids are added to
+    /// the roaring bitmap.
+    /// 3. If the element's range merely intersects the bounds, then we call the algorithm recursively
+    /// on the children of the element from the level below.
+    /// 4. If the element's range is greater than the right bound, we do nothing and stop iterating.
+    /// Note that the right bound is found through either the `left_bound` of the *next* element,
+    /// or from the `rightmost_bound` argument
+    ///
+    /// ## Arguments
+    /// - `level`: the level being visited
+    /// - `starting_left_bound`: the left_bound of the first element to visit
+    /// - `rightmost_bound`: the right bound of the last element that should be visited
+    /// - `group_size`: the number of elements that should be visited
+    fn run(
+        &mut self,
+        level: u8,
+        starting_left_bound: &'t [u8],
+        rightmost_bound: Bound<&'t [u8]>,
+        group_size: usize,
+    ) -> Result<()> {
+        if level == 0 {
+            return self.run_level_0(starting_left_bound, group_size);
+        }
+
+        let left_key =
+            FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound };
+        let mut iter = self.db.range(self.rtxn, &(left_key..))?.take(group_size);
+
+        // We iterate over the range while keeping in memory the previous value
+        let (mut previous_key, mut previous_value) = iter.next().unwrap()?;
+        for el in iter {
+            let (next_key, next_value) = el?;
+            // the right of the iter range is potentially unbounded (e.g. if `group_size` is usize::MAX),
+            // so we need to make sure that we are not iterating on the next field id
+            if next_key.field_id != self.field_id {
+                break;
+            }
+            // now, do we skip, stop, or visit?
+            let should_skip = {
+                match self.left {
+                    Bound::Included(left) => left >= next_key.left_bound,
+                    Bound::Excluded(left) => left >= next_key.left_bound,
+                    Bound::Unbounded => false,
+                }
+            };
+            if should_skip {
+                previous_key = next_key;
+                previous_value = next_value;
+                continue;
+            }
+
+            // should we stop?
+            // We should if the search range doesn't include any
+            // element from the previous key or its successors
+            let should_stop = {
+                match self.right {
+                    Bound::Included(right) => right < previous_key.left_bound,
+                    Bound::Excluded(right) => right <= previous_key.left_bound,
+                    Bound::Unbounded => false,
+                }
+            };
+            if should_stop {
+                return Ok(());
+            }
+            // should we take the whole thing, without recursing down?
+            let should_take_whole_group = {
+                let left_condition = match self.left {
+                    Bound::Included(left) => previous_key.left_bound >= left,
+                    Bound::Excluded(left) => previous_key.left_bound > left,
+                    Bound::Unbounded => true,
+                };
+                let right_condition = match self.right {
+                    Bound::Included(right) => next_key.left_bound <= right,
+                    Bound::Excluded(right) => next_key.left_bound <= right,
+                    Bound::Unbounded => true,
+                };
+                left_condition && right_condition
+            };
+            if should_take_whole_group {
+                *self.docids |= match self.universe {
+                    Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                        previous_value.bitmap_bytes,
+                        universe,
+                    )?,
+                    None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
+                };
+                previous_key = next_key;
+                previous_value = next_value;
+                continue;
+            }
+            // from here, we should visit the children of the previous element and
+            // call the function recursively
+
+            let level = level - 1;
+            let starting_left_bound = previous_key.left_bound;
+            let rightmost_bound = Bound::Excluded(next_key.left_bound);
+            let group_size = previous_value.size as usize;
+
+            self.run(level, starting_left_bound, rightmost_bound, group_size)?;
+
+            previous_key = next_key;
+            previous_value = next_value;
+        }
+        // previous_key/previous_value are the last element's key/value
+
+        // now, do we skip, stop, or visit?
+        let should_skip = {
+            match (self.left, rightmost_bound) {
+                (Bound::Included(left), Bound::Included(right)) => left > right,
+                (Bound::Included(left), Bound::Excluded(right)) => left >= right,
+                (Bound::Excluded(left), Bound::Included(right) | Bound::Excluded(right)) => {
+                    left >= right
+                }
+                (Bound::Unbounded, _) => false,
+                (_, Bound::Unbounded) => false, // should never run?
+            }
+        };
+        if should_skip {
+            return Ok(());
+        }
+
+        // should we stop?
+        // We should if the search range doesn't include any
+        // element from the previous key or its successors
+        let should_stop = {
+            match self.right {
+                Bound::Included(right) => right < previous_key.left_bound,
+                Bound::Excluded(right) => right <= previous_key.left_bound,
+                Bound::Unbounded => false,
+            }
+        };
+        if should_stop {
+            return Ok(());
+        }
+        // should we take the whole thing, without recursing down?
+        let should_take_whole_group = {
+            let left_condition = match self.left {
+                Bound::Included(left) => previous_key.left_bound >= left,
+                Bound::Excluded(left) => previous_key.left_bound > left,
+                Bound::Unbounded => true,
+            };
+            let right_condition = match (self.right, rightmost_bound) {
+                (Bound::Included(right), Bound::Included(rightmost)) => {
+                    // we need to stay within the bound ..=right
+                    // the element's range goes to ..=righmost
+                    // so the element fits entirely within the bound if rightmost <= right
+                    rightmost <= right
+                }
+                (Bound::Included(right), Bound::Excluded(rightmost)) => {
+                    // we need to stay within the bound ..=right
+                    // the element's range goes to ..righmost
+                    // so the element fits entirely within the bound if rightmost <= right
+                    rightmost <= right
+                }
+                (Bound::Excluded(right), Bound::Included(rightmost)) => {
+                    // we need to stay within the bound ..right
+                    // the element's range goes to ..=righmost
+                    // so the element fits entirely within the bound if rightmost < right
+                    rightmost < right
+                }
+                (Bound::Excluded(right), Bound::Excluded(rightmost)) => {
+                    // we need to stay within the bound ..right
+                    // the element's range goes to ..righmost
+                    // so the element fits entirely within the bound if rightmost <= right
+                    rightmost <= right
+                }
+                (Bound::Unbounded, _) => {
+                    // we need to stay within the bound ..inf
+                    // so the element always fits entirely within the bound
+                    true
+                }
+                (_, Bound::Unbounded) => {
+                    // we need to stay within a finite bound
+                    // but the element's range goes to ..inf
+                    // so the element never fits entirely within the bound
+                    false
+                }
+            };
+            left_condition && right_condition
+        };
+        if should_take_whole_group {
+            *self.docids |= match self.universe {
+                Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                    previous_value.bitmap_bytes,
+                    universe,
+                )?,
+                None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
+            };
+        } else {
+            let level = level - 1;
+            let starting_left_bound = previous_key.left_bound;
+            let group_size = previous_value.size as usize;
+
+            self.run(level, starting_left_bound, rightmost_bound, group_size)?;
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Bound;
+
+    use roaring::RoaringBitmap;
+
+    use super::find_docids_of_facet_within_bounds;
+    use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
+    use crate::milli_snap;
+    use crate::search::facet::tests::{
+        get_random_looking_index, get_random_looking_index_with_multiple_field_ids,
+        get_simple_index, get_simple_index_with_multiple_field_ids,
+    };
+    use crate::snapshot_tests::display_bitmap;
+
+    #[test]
+    fn random_looking_index_snap() {
+        let index = get_random_looking_index();
+        milli_snap!(format!("{index}"), @"3256c76a7c1b768a013e78d5fa6e9ff9");
+    }
+
+    #[test]
+    fn random_looking_index_with_multiple_field_ids_snap() {
+        let index = get_random_looking_index_with_multiple_field_ids();
+        milli_snap!(format!("{index}"), @"c3e5fe06a8f1c404ed4935b32c90a89b");
+    }
+
+    #[test]
+    fn simple_index_snap() {
+        let index = get_simple_index();
+        milli_snap!(format!("{index}"), @"5dbfa134cc44abeb3ab6242fc182e48e");
+    }
+
+    #[test]
+    fn simple_index_with_multiple_field_ids_snap() {
+        let index = get_simple_index_with_multiple_field_ids();
+        milli_snap!(format!("{index}"), @"a4893298218f682bc76357f46777448c");
+    }
+
+    #[test]
+    fn filter_range_increasing() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+            get_random_looking_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let mut results = String::new();
+            for i in 0..=255 {
+                let i = i as f64;
+                let start = Bound::Included(0.);
+                let end = Bound::Included(i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results.push_str(&format!("0 <= . <= {i} : {}\n", display_bitmap(&docids)));
+            }
+            milli_snap!(results, format!("included_{i}"));
+            let mut results = String::new();
+            for i in 0..=255 {
+                let i = i as f64;
+                let start = Bound::Excluded(0.);
+                let end = Bound::Excluded(i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results.push_str(&format!("0 < . < {i} : {}\n", display_bitmap(&docids)));
+            }
+            milli_snap!(results, format!("excluded_{i}"));
+            txn.commit().unwrap();
+        }
+    }
+    #[test]
+    fn filter_range_decreasing() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+            get_random_looking_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+
+            let mut results = String::new();
+
+            for i in (0..=255).rev() {
+                let i = i as f64;
+                let start = Bound::Included(i);
+                let end = Bound::Included(255.);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                results.push_str(&format!("{i} <= . <= 255 : {}\n", display_bitmap(&docids)));
+            }
+
+            milli_snap!(results, format!("included_{i}"));
+
+            let mut results = String::new();
+
+            for i in (0..=255).rev() {
+                let i = i as f64;
+                let start = Bound::Excluded(i);
+                let end = Bound::Excluded(255.);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                results.push_str(&format!("{i} < . < 255 : {}\n", display_bitmap(&docids)));
+            }
+
+            milli_snap!(results, format!("excluded_{i}"));
+
+            txn.commit().unwrap();
+        }
+    }
+    #[test]
+    fn filter_range_pinch() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+            get_random_looking_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+
+            let mut results = String::new();
+
+            for i in (0..=128).rev() {
+                let i = i as f64;
+                let start = Bound::Included(i);
+                let end = Bound::Included(255. - i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                results.push_str(&format!(
+                    "{i} <= . <= {r} : {docids}\n",
+                    r = 255. - i,
+                    docids = display_bitmap(&docids)
+                ));
+            }
+
+            milli_snap!(results, format!("included_{i}"));
+
+            let mut results = String::new();
+
+            for i in (0..=128).rev() {
+                let i = i as f64;
+                let start = Bound::Excluded(i);
+                let end = Bound::Excluded(255. - i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                results.push_str(&format!(
+                    "{i} <  . < {r} {docids}\n",
+                    r = 255. - i,
+                    docids = display_bitmap(&docids)
+                ));
+            }
+
+            milli_snap!(results, format!("excluded_{i}"));
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_range_unbounded() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+            get_random_looking_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let mut results = String::new();
+            for i in 0..=255 {
+                let i = i as f64;
+                let start = Bound::Included(i);
+                let end = Bound::Unbounded;
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results.push_str(&format!(">= {i}: {}\n", display_bitmap(&docids)));
+            }
+            milli_snap!(results, format!("start_from_included_{i}"));
+            let mut results = String::new();
+            for i in 0..=255 {
+                let i = i as f64;
+                let start = Bound::Unbounded;
+                let end = Bound::Included(i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results.push_str(&format!("<= {i}: {}\n", display_bitmap(&docids)));
+            }
+            milli_snap!(results, format!("end_at_included_{i}"));
+
+            let mut docids = RoaringBitmap::new();
+            find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                &txn,
+                index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                0,
+                &Bound::Unbounded,
+                &Bound::Unbounded,
+                None,
+                &mut docids,
+            )
+            .unwrap();
+            milli_snap!(
+                &format!("all field_id 0: {}\n", display_bitmap(&docids)),
+                format!("unbounded_field_id_0_{i}")
+            );
+
+            let mut docids = RoaringBitmap::new();
+            find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                &txn,
+                index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                1,
+                &Bound::Unbounded,
+                &Bound::Unbounded,
+                None,
+                &mut docids,
+            )
+            .unwrap();
+            milli_snap!(
+                &format!("all field_id 1:  {}\n", display_bitmap(&docids)),
+                format!("unbounded_field_id_1_{i}")
+            );
+
+            drop(txn);
+        }
+    }
+
+    #[test]
+    fn filter_range_exact() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+            get_random_looking_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let mut results_0 = String::new();
+            let mut results_1 = String::new();
+            for i in 0..=255 {
+                let i = i as f64;
+                let start = Bound::Included(i);
+                let end = Bound::Included(i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results_0.push_str(&format!("{i}: {}\n", display_bitmap(&docids)));
+
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    1,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results_1.push_str(&format!("{i}: {}\n", display_bitmap(&docids)));
+            }
+            milli_snap!(results_0, format!("field_id_0_exact_{i}"));
+            milli_snap!(results_1, format!("field_id_1_exact_{i}"));
+
+            drop(txn);
+        }
+    }
+}
--- a/crates/milli/src/search/facet/facet_sort_ascending.rs
+++ b/crates/milli/src/search/facet/facet_sort_ascending.rs
@ -0,0 +1,230 @@
+use heed::Result;
+use roaring::RoaringBitmap;
+
+use super::{get_first_facet_value, get_highest_level};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
+};
+use crate::heed_codec::BytesRefCodec;
+
+/// Return an iterator which iterates over the given candidate documents in
+/// ascending order of their facet value for the given field id.
+///
+/// The documents returned by the iterator are grouped by the facet values that
+/// determined their rank. For example, given the documents:
+///
+/// ```text
+/// 0: { "colour": ["blue", "green"] }
+/// 1: { "colour": ["blue", "red"] }
+/// 2: { "colour": ["orange", "red"] }
+/// 3: { "colour": ["green", "red"] }
+/// 4: { "colour": ["blue", "orange", "red"] }
+/// ```
+/// Then calling the function on the candidates `[0, 2, 3, 4]` will return an iterator
+/// over the following elements:
+/// ```text
+/// [0, 4]  // corresponds to all the documents within the candidates that have the facet value "blue"
+/// [3]     // same for "green"
+/// [2]     // same for "orange"
+/// END
+/// ```
+/// Note that once a document id is returned by the iterator, it is never returned again.
+pub fn ascending_facet_sort<'t>(
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    candidates: RoaringBitmap,
+) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
+    let highest_level = get_highest_level(rtxn, db, field_id)?;
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
+        let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
+        let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
+
+        Ok(itertools::Either::Left(AscendingFacetSort {
+            rtxn,
+            db,
+            field_id,
+            stack: vec![(candidates, iter)],
+        }))
+    } else {
+        Ok(itertools::Either::Right(std::iter::empty()))
+    }
+}
+
+struct AscendingFacetSort<'t, 'e> {
+    rtxn: &'t heed::RoTxn<'e>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    #[allow(clippy::type_complexity)]
+    stack: Vec<(
+        RoaringBitmap,
+        std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>>,
+    )>,
+}
+
+impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
+    type Item = Result<(RoaringBitmap, &'t [u8])>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        'outer: loop {
+            let (documents_ids, deepest_iter) = self.stack.last_mut()?;
+            for result in deepest_iter {
+                let (
+                    FacetGroupKey { level, left_bound, field_id },
+                    FacetGroupValue { size: group_size, mut bitmap },
+                ) = result.unwrap();
+                // The range is unbounded on the right and the group size for the highest level is MAX,
+                // so we need to check that we are not iterating over the next field id
+                if field_id != self.field_id {
+                    return None;
+                }
+
+                // If the last iterator found an empty set of documents it means
+                // that we found all the documents in the sub level iterations already,
+                // we can pop this level iterator.
+                if documents_ids.is_empty() {
+                    // break our of the for loop into the end of the 'outer loop, which
+                    // pops the stack
+                    break;
+                }
+
+                bitmap &= &*documents_ids;
+                if !bitmap.is_empty() {
+                    *documents_ids -= &bitmap;
+
+                    if level == 0 {
+                        // Since the level is 0, the left_bound is the exact value.
+                        return Some(Ok((bitmap, left_bound)));
+                    }
+                    let starting_key_below =
+                        FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound };
+                    let iter = match self.db.range(self.rtxn, &(starting_key_below..)) {
+                        Ok(iter) => iter,
+                        Err(e) => return Some(Err(e)),
+                    }
+                    .take(group_size as usize);
+
+                    self.stack.push((bitmap, iter));
+                    continue 'outer;
+                }
+            }
+            self.stack.pop();
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use roaring::RoaringBitmap;
+
+    use crate::milli_snap;
+    use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
+    use crate::search::facet::tests::{
+        get_random_looking_index, get_random_looking_string_index_with_multiple_field_ids,
+        get_simple_index, get_simple_string_index_with_multiple_field_ids,
+    };
+    use crate::snapshot_tests::display_bitmap;
+
+    #[test]
+    fn filter_sort_ascending() {
+        let indexes = [get_simple_index(), get_random_looking_index()];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, i);
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_sort_ascending_multiple_field_ids() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, format!("{i}-0"));
+
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, format!("{i}-1"));
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_sort_ascending_with_no_candidates() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for index in indexes {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = RoaringBitmap::new();
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_sort_ascending_with_inexisting_field_id() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for index in indexes {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = RoaringBitmap::new();
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            txn.commit().unwrap();
+        }
+    }
+}
--- a/crates/milli/src/search/facet/facet_sort_descending.rs
+++ b/crates/milli/src/search/facet/facet_sort_descending.rs
@ -0,0 +1,244 @@
+use std::ops::Bound;
+
+use heed::Result;
+use roaring::RoaringBitmap;
+
+use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
+};
+use crate::heed_codec::BytesRefCodec;
+
+/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
+///
+/// This function does the same thing, but in the opposite order.
+pub fn descending_facet_sort<'t>(
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    candidates: RoaringBitmap,
+) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
+    let highest_level = get_highest_level(rtxn, db, field_id)?;
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
+        let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
+        let last_bound = get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap();
+        let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
+        let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
+        Ok(itertools::Either::Left(DescendingFacetSort {
+            rtxn,
+            db,
+            field_id,
+            stack: vec![(candidates, iter, Bound::Included(last_bound))],
+        }))
+    } else {
+        Ok(itertools::Either::Right(std::iter::empty()))
+    }
+}
+
+struct DescendingFacetSort<'t> {
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    #[allow(clippy::type_complexity)]
+    stack: Vec<(
+        RoaringBitmap,
+        std::iter::Take<
+            heed::RoRevRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+        >,
+        Bound<&'t [u8]>,
+    )>,
+}
+
+impl<'t> Iterator for DescendingFacetSort<'t> {
+    type Item = Result<(RoaringBitmap, &'t [u8])>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        'outer: loop {
+            let (documents_ids, deepest_iter, right_bound) = self.stack.last_mut()?;
+            for result in deepest_iter.by_ref() {
+                let (
+                    FacetGroupKey { level, left_bound, field_id },
+                    FacetGroupValue { size: group_size, mut bitmap },
+                ) = result.unwrap();
+                // The range is unbounded on the right and the group size for the highest level is MAX,
+                // so we need to check that we are not iterating over the next field id
+                if field_id != self.field_id {
+                    return None;
+                }
+                // If the last iterator found an empty set of documents it means
+                // that we found all the documents in the sub level iterations already,
+                // we can pop this level iterator.
+                if documents_ids.is_empty() {
+                    break;
+                }
+
+                bitmap &= &*documents_ids;
+                if !bitmap.is_empty() {
+                    *documents_ids -= &bitmap;
+
+                    if level == 0 {
+                        // Since we're at the level 0 the left_bound is the exact value.
+                        return Some(Ok((bitmap, left_bound)));
+                    }
+                    let starting_key_below =
+                        FacetGroupKey { field_id, level: level - 1, left_bound };
+
+                    let end_key_kelow = match *right_bound {
+                        Bound::Included(right) => Bound::Included(FacetGroupKey {
+                            field_id,
+                            level: level - 1,
+                            left_bound: right,
+                        }),
+                        Bound::Excluded(right) => Bound::Excluded(FacetGroupKey {
+                            field_id,
+                            level: level - 1,
+                            left_bound: right,
+                        }),
+                        Bound::Unbounded => Bound::Unbounded,
+                    };
+                    let prev_right_bound = *right_bound;
+                    *right_bound = Bound::Excluded(left_bound);
+                    let iter = match self
+                        .db
+                        .remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
+                        .rev_range(self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow))
+                    {
+                        Ok(iter) => iter,
+                        Err(e) => return Some(Err(e)),
+                    }
+                    .take(group_size as usize);
+
+                    self.stack.push((bitmap, iter, prev_right_bound));
+                    continue 'outer;
+                }
+                *right_bound = Bound::Excluded(left_bound);
+            }
+            self.stack.pop();
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use roaring::RoaringBitmap;
+
+    use crate::heed_codec::facet::FacetGroupKeyCodec;
+    use crate::heed_codec::BytesRefCodec;
+    use crate::milli_snap;
+    use crate::search::facet::facet_sort_descending::descending_facet_sort;
+    use crate::search::facet::tests::{
+        get_random_looking_index, get_random_looking_string_index_with_multiple_field_ids,
+        get_simple_index, get_simple_index_with_multiple_field_ids,
+        get_simple_string_index_with_multiple_field_ids,
+    };
+    use crate::snapshot_tests::display_bitmap;
+
+    #[test]
+    fn filter_sort_descending() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
+            let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, i);
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_sort_descending_multiple_field_ids() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
+            let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, format!("{i}-0"));
+
+            let mut results = String::new();
+
+            let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, format!("{i}-1"));
+
+            txn.commit().unwrap();
+        }
+    }
+    #[test]
+    fn filter_sort_ascending_with_no_candidates() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for index in indexes {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = RoaringBitmap::new();
+            let mut results = String::new();
+            let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            let mut results = String::new();
+            let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_sort_ascending_with_inexisting_field_id() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for index in indexes {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = RoaringBitmap::new();
+            let mut results = String::new();
+            let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            txn.commit().unwrap();
+        }
+    }
+}
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
--- a/crates/milli/src/search/facet/mod.rs
+++ b/crates/milli/src/search/facet/mod.rs
@ -0,0 +1,227 @@
+pub use facet_sort_ascending::ascending_facet_sort;
+pub use facet_sort_descending::descending_facet_sort;
+use heed::types::{Bytes, DecodeIgnore};
+use heed::{BytesDecode, RoTxn};
+use roaring::RoaringBitmap;
+
+pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
+pub use self::filter::{BadGeoError, Filter};
+pub use self::search::{FacetValueHit, SearchForFacetValues};
+use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
+use crate::heed_codec::BytesRefCodec;
+use crate::{Index, Result};
+
+mod facet_distribution;
+mod facet_distribution_iter;
+mod facet_range_search;
+mod facet_sort_ascending;
+mod facet_sort_descending;
+mod filter;
+mod search;
+
+fn facet_extreme_value<'t>(
+    mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,
+) -> Result<Option<f64>> {
+    let extreme_value =
+        if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
+    let (_, extreme_value) = extreme_value?;
+    OrderedF64Codec::bytes_decode(extreme_value)
+        .map(Some)
+        .map_err(heed::Error::Decoding)
+        .map_err(Into::into)
+}
+
+pub fn facet_min_value<'t>(
+    index: &'t Index,
+    rtxn: &'t heed::RoTxn<'t>,
+    field_id: u16,
+    candidates: RoaringBitmap,
+) -> Result<Option<f64>> {
+    let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
+    let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
+    facet_extreme_value(it)
+}
+
+pub fn facet_max_value<'t>(
+    index: &'t Index,
+    rtxn: &'t heed::RoTxn<'t>,
+    field_id: u16,
+    candidates: RoaringBitmap,
+) -> Result<Option<f64>> {
+    let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
+    let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
+    facet_extreme_value(it)
+}
+
+/// Get the first facet value in the facet database
+pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>(
+    txn: &'t RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
+    field_id: u16,
+) -> heed::Result<Option<BoundCodec::DItem>>
+where
+    BoundCodec: BytesDecode<'t>,
+{
+    let mut level0prefix = vec![];
+    level0prefix.extend_from_slice(&field_id.to_be_bytes());
+    level0prefix.push(0);
+    let mut level0_iter_forward =
+        db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, level0prefix.as_slice())?;
+    if let Some(first) = level0_iter_forward.next() {
+        let (first_key, _) = first?;
+        let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key)
+            .map_err(heed::Error::Decoding)?;
+        Ok(Some(first_key.left_bound))
+    } else {
+        Ok(None)
+    }
+}
+
+/// Get the last facet value in the facet database
+pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>(
+    txn: &'t RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
+    field_id: u16,
+) -> heed::Result<Option<BoundCodec::DItem>>
+where
+    BoundCodec: BytesDecode<'t>,
+{
+    let mut level0prefix = vec![];
+    level0prefix.extend_from_slice(&field_id.to_be_bytes());
+    level0prefix.push(0);
+    let mut level0_iter_backward =
+        db.remap_types::<Bytes, DecodeIgnore>().rev_prefix_iter(txn, level0prefix.as_slice())?;
+    if let Some(last) = level0_iter_backward.next() {
+        let (last_key, _) = last?;
+        let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key)
+            .map_err(heed::Error::Decoding)?;
+        Ok(Some(last_key.left_bound))
+    } else {
+        Ok(None)
+    }
+}
+
+/// Get the height of the highest level in the facet database
+pub(crate) fn get_highest_level<'t, DC>(
+    txn: &'t RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
+    field_id: u16,
+) -> heed::Result<u8> {
+    let field_id_prefix = &field_id.to_be_bytes();
+    Ok(db
+        .remap_types::<Bytes, DecodeIgnore>()
+        .rev_prefix_iter(txn, field_id_prefix)?
+        .next()
+        .map(|el| {
+            let (key, _) = el.unwrap();
+            let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
+            key.level
+        })
+        .unwrap_or(0))
+}
+
+#[cfg(test)]
+pub(crate) mod tests {
+    use rand::{Rng, SeedableRng};
+    use roaring::RoaringBitmap;
+
+    use crate::heed_codec::facet::OrderedF64Codec;
+    use crate::heed_codec::StrRefCodec;
+    use crate::update::facet::test_helpers::FacetIndex;
+
+    pub fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
+        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+        for i in 0..256u16 {
+            let mut bitmap = RoaringBitmap::new();
+            bitmap.insert(i as u32);
+            index.insert(&mut txn, 0, &(i as f64), &bitmap);
+        }
+        txn.commit().unwrap();
+        index
+    }
+    pub fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
+        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
+
+        for key in std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128) {
+            let mut bitmap = RoaringBitmap::new();
+            bitmap.insert(key);
+            bitmap.insert(key + 100);
+            index.insert(&mut txn, 0, &(key as f64), &bitmap);
+        }
+        txn.commit().unwrap();
+        index
+    }
+    pub fn get_simple_index_with_multiple_field_ids() -> FacetIndex<OrderedF64Codec> {
+        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+        for fid in 0..2 {
+            for i in 0..256u16 {
+                let mut bitmap = RoaringBitmap::new();
+                bitmap.insert(i as u32);
+                index.insert(&mut txn, fid, &(i as f64), &bitmap);
+            }
+        }
+        txn.commit().unwrap();
+        index
+    }
+    pub fn get_random_looking_index_with_multiple_field_ids() -> FacetIndex<OrderedF64Codec> {
+        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+
+        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
+        let keys =
+            std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
+        for fid in 0..2 {
+            for &key in &keys {
+                let mut bitmap = RoaringBitmap::new();
+                bitmap.insert(key);
+                bitmap.insert(key + 100);
+                index.insert(&mut txn, fid, &(key as f64), &bitmap);
+            }
+        }
+        txn.commit().unwrap();
+        index
+    }
+    pub fn get_simple_string_index_with_multiple_field_ids() -> FacetIndex<StrRefCodec> {
+        let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+        for fid in 0..2 {
+            for i in 0..256u16 {
+                let mut bitmap = RoaringBitmap::new();
+                bitmap.insert(i as u32);
+                if i % 2 == 0 {
+                    index.insert(&mut txn, fid, &format!("{i}").as_str(), &bitmap);
+                } else {
+                    index.insert(&mut txn, fid, &"", &bitmap);
+                }
+            }
+        }
+        txn.commit().unwrap();
+        index
+    }
+    pub fn get_random_looking_string_index_with_multiple_field_ids() -> FacetIndex<StrRefCodec> {
+        let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+
+        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
+        let keys =
+            std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
+        for fid in 0..2 {
+            for &key in &keys {
+                let mut bitmap = RoaringBitmap::new();
+                bitmap.insert(key);
+                bitmap.insert(key + 100);
+                if key % 2 == 0 {
+                    index.insert(&mut txn, fid, &format!("{key}").as_str(), &bitmap);
+                } else {
+                    index.insert(&mut txn, fid, &"", &bitmap);
+                }
+            }
+        }
+        txn.commit().unwrap();
+        index
+    }
+}
--- a/crates/milli/src/search/facet/search.rs
+++ b/crates/milli/src/search/facet/search.rs
@ -0,0 +1,358 @@
+use std::cmp::{Ordering, Reverse};
+use std::collections::BinaryHeap;
+use std::ops::ControlFlow;
+
+use charabia::normalizer::NormalizerOption;
+use charabia::{Language, Normalize, StrDetection, Token};
+use fst::automaton::{Automaton, Str};
+use fst::{IntoStreamer, Streamer};
+use roaring::RoaringBitmap;
+use tracing::error;
+
+use crate::error::UserError;
+use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
+use crate::search::build_dfa;
+use crate::{DocumentId, FieldId, OrderBy, Result, Search};
+
+/// The maximum number of values per facet returned by the facet search route.
+const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100;
+
+pub struct SearchForFacetValues<'a> {
+    query: Option<String>,
+    facet: String,
+    search_query: Search<'a>,
+    max_values: usize,
+    is_hybrid: bool,
+    locales: Option<Vec<Language>>,
+}
+
+impl<'a> SearchForFacetValues<'a> {
+    pub fn new(
+        facet: String,
+        search_query: Search<'a>,
+        is_hybrid: bool,
+    ) -> SearchForFacetValues<'a> {
+        SearchForFacetValues {
+            query: None,
+            facet,
+            search_query,
+            max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
+            is_hybrid,
+            locales: None,
+        }
+    }
+
+    pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
+        self.query = Some(query.into());
+        self
+    }
+
+    pub fn max_values(&mut self, max: usize) -> &mut Self {
+        self.max_values = max;
+        self
+    }
+
+    pub fn locales(&mut self, locales: Vec<Language>) -> &mut Self {
+        self.locales = Some(locales);
+        self
+    }
+
+    fn one_original_value_of(
+        &self,
+        field_id: FieldId,
+        facet_str: &str,
+        any_docid: DocumentId,
+    ) -> Result<Option<String>> {
+        let index = self.search_query.index;
+        let rtxn = self.search_query.rtxn;
+        let key: (FieldId, _, &str) = (field_id, any_docid, facet_str);
+        Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned()))
+    }
+
+    pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
+        let index = self.search_query.index;
+        let rtxn = self.search_query.rtxn;
+
+        let filterable_fields = index.filterable_fields(rtxn)?;
+        if !filterable_fields.contains(&self.facet) {
+            let (valid_fields, hidden_fields) =
+                index.remove_hidden_fields(rtxn, filterable_fields)?;
+
+            return Err(UserError::InvalidFacetSearchFacetName {
+                field: self.facet.clone(),
+                valid_fields,
+                hidden_fields,
+            }
+            .into());
+        }
+
+        let fields_ids_map = index.fields_ids_map(rtxn)?;
+        let fid = match fields_ids_map.id(&self.facet) {
+            Some(fid) => fid,
+            // we return an empty list of results when the attribute has been
+            // set as filterable but no document contains this field (yet).
+            None => return Ok(Vec::new()),
+        };
+
+        let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {
+            Some(fst) => fst,
+            None => return Ok(Vec::new()),
+        };
+
+        let search_candidates = self.search_query.execute_for_candidates(
+            self.is_hybrid
+                || self
+                    .search_query
+                    .semantic
+                    .as_ref()
+                    .and_then(|semantic| semantic.vector.as_ref())
+                    .is_some(),
+        )?;
+
+        let mut results = match index.sort_facet_values_by(rtxn)?.get(&self.facet) {
+            OrderBy::Lexicographic => ValuesCollection::by_lexicographic(self.max_values),
+            OrderBy::Count => ValuesCollection::by_count(self.max_values),
+        };
+
+        match self.query.as_ref() {
+            Some(query) => {
+                let query = normalize_facet_string(query, self.locales.as_deref());
+                let query = query.as_ref();
+
+                let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
+                let field_authorizes_typos =
+                    !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
+
+                if authorize_typos && field_authorizes_typos {
+                    let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
+                    if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
+                        if fst.contains(query) {
+                            self.fetch_original_facets_using_normalized(
+                                fid,
+                                query,
+                                query,
+                                &search_candidates,
+                                &mut results,
+                            )?;
+                        }
+                    } else {
+                        let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
+                        let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
+
+                        let is_prefix = true;
+                        let automaton = if query.len() < one_typo as usize {
+                            build_dfa(query, 0, is_prefix)
+                        } else if query.len() < two_typos as usize {
+                            build_dfa(query, 1, is_prefix)
+                        } else {
+                            build_dfa(query, 2, is_prefix)
+                        };
+
+                        let mut stream = fst.search(automaton).into_stream();
+                        while let Some(facet_value) = stream.next() {
+                            let value = std::str::from_utf8(facet_value)?;
+                            if self
+                                .fetch_original_facets_using_normalized(
+                                    fid,
+                                    value,
+                                    query,
+                                    &search_candidates,
+                                    &mut results,
+                                )?
+                                .is_break()
+                            {
+                                break;
+                            }
+                        }
+                    }
+                } else {
+                    let automaton = Str::new(query).starts_with();
+                    let mut stream = fst.search(automaton).into_stream();
+                    while let Some(facet_value) = stream.next() {
+                        let value = std::str::from_utf8(facet_value)?;
+                        if self
+                            .fetch_original_facets_using_normalized(
+                                fid,
+                                value,
+                                query,
+                                &search_candidates,
+                                &mut results,
+                            )?
+                            .is_break()
+                        {
+                            break;
+                        }
+                    }
+                }
+            }
+            None => {
+                let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
+                for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
+                    let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
+                        result?;
+                    let count = search_candidates.intersection_len(&bitmap);
+                    if count != 0 {
+                        let value = self
+                            .one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
+                            .unwrap_or_else(|| left_bound.to_string());
+                        if results.insert(FacetValueHit { value, count }).is_break() {
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        Ok(results.into_sorted_vec())
+    }
+
+    fn fetch_original_facets_using_normalized(
+        &self,
+        fid: FieldId,
+        value: &str,
+        query: &str,
+        search_candidates: &RoaringBitmap,
+        results: &mut ValuesCollection,
+    ) -> Result<ControlFlow<()>> {
+        let index = self.search_query.index;
+        let rtxn = self.search_query.rtxn;
+
+        let database = index.facet_id_normalized_string_strings;
+        let key = (fid, value);
+        let original_strings = match database.get(rtxn, &key)? {
+            Some(original_strings) => original_strings,
+            None => {
+                error!("the facet value is missing from the facet database: {key:?}");
+                return Ok(ControlFlow::Continue(()));
+            }
+        };
+        for original in original_strings {
+            let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() };
+            let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
+                Some(FacetGroupValue { bitmap, .. }) => bitmap,
+                None => {
+                    error!("the facet value is missing from the facet database: {key:?}");
+                    return Ok(ControlFlow::Continue(()));
+                }
+            };
+            let count = search_candidates.intersection_len(&docids);
+            if count != 0 {
+                let value = self
+                    .one_original_value_of(fid, &original, docids.min().unwrap())?
+                    .unwrap_or_else(|| query.to_string());
+                if results.insert(FacetValueHit { value, count }).is_break() {
+                    break;
+                }
+            }
+        }
+
+        Ok(ControlFlow::Continue(()))
+    }
+}
+
+#[derive(Debug, Clone, serde::Serialize, PartialEq)]
+pub struct FacetValueHit {
+    /// The original facet value
+    pub value: String,
+    /// The number of documents associated to this facet
+    pub count: u64,
+}
+
+impl PartialOrd for FacetValueHit {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for FacetValueHit {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.count.cmp(&other.count).then_with(|| self.value.cmp(&other.value))
+    }
+}
+
+impl Eq for FacetValueHit {}
+
+/// A wrapper type that collects the best facet values by
+/// lexicographic or number of associated values.
+enum ValuesCollection {
+    /// Keeps the top values according to the lexicographic order.
+    Lexicographic { max: usize, content: Vec<FacetValueHit> },
+    /// Keeps the top values according to the number of values associated to them.
+    ///
+    /// Note that it is a max heap and we need to move the smallest counts
+    /// at the top to be able to pop them when we reach the max_values limit.
+    Count { max: usize, content: BinaryHeap<Reverse<FacetValueHit>> },
+}
+
+impl ValuesCollection {
+    pub fn by_lexicographic(max: usize) -> Self {
+        ValuesCollection::Lexicographic { max, content: Vec::new() }
+    }
+
+    pub fn by_count(max: usize) -> Self {
+        ValuesCollection::Count { max, content: BinaryHeap::new() }
+    }
+
+    pub fn insert(&mut self, value: FacetValueHit) -> ControlFlow<()> {
+        match self {
+            ValuesCollection::Lexicographic { max, content } => {
+                if content.len() < *max {
+                    content.push(value);
+                    if content.len() < *max {
+                        return ControlFlow::Continue(());
+                    }
+                }
+                ControlFlow::Break(())
+            }
+            ValuesCollection::Count { max, content } => {
+                if content.len() == *max {
+                    // Peeking gives us the worst value in the list as
+                    // this is a max-heap and we reversed it.
+                    let Some(mut peek) = content.peek_mut() else { return ControlFlow::Break(()) };
+                    if peek.0.count <= value.count {
+                        // Replace the current worst value in the heap
+                        // with the new one we received that is better.
+                        *peek = Reverse(value);
+                    }
+                } else {
+                    content.push(Reverse(value));
+                }
+                ControlFlow::Continue(())
+            }
+        }
+    }
+
+    /// Returns the list of facet values in descending order of, either,
+    /// count or lexicographic order of the value depending on the type.
+    pub fn into_sorted_vec(self) -> Vec<FacetValueHit> {
+        match self {
+            ValuesCollection::Lexicographic { content, .. } => content.into_iter().collect(),
+            ValuesCollection::Count { content, .. } => {
+                // Convert the heap into a vec of hits by removing the Reverse wrapper.
+                // Hits are already in the right order as they were reversed and there
+                // are output in ascending order.
+                content.into_sorted_vec().into_iter().map(|Reverse(hit)| hit).collect()
+            }
+        }
+    }
+}
+fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
+    let options = NormalizerOption { lossy: true, ..Default::default() };
+    let mut detection = StrDetection::new(facet_string, locales);
+
+    // Detect the language of the facet string only if several locales are explicitly provided.
+    let language = match locales {
+        Some(&[language]) => Some(language),
+        Some(multiple_locales) if multiple_locales.len() > 1 => detection.language(),
+        _ => None,
+    };
+
+    let token = Token {
+        lemma: std::borrow::Cow::Borrowed(facet_string),
+        script: detection.script(),
+        language,
+        ..Default::default()
+    };
+
+    token.normalize(&options).lemma.into_owned()
+}
--- a/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/0.snap
@ -0,0 +1,260 @@
+---
+source: milli/src/search/facet/facet_distribution_iter.rs
+---
+0: 1
+1: 1
+2: 1
+3: 1
+4: 1
+5: 1
+6: 1
+7: 1
+8: 1
+9: 1
+10: 1
+11: 1
+12: 1
+13: 1
+14: 1
+15: 1
+16: 1
+17: 1
+18: 1
+19: 1
+20: 1
+21: 1
+22: 1
+23: 1
+24: 1
+25: 1
+26: 1
+27: 1
+28: 1
+29: 1
+30: 1
+31: 1
+32: 1
+33: 1
+34: 1
+35: 1
+36: 1
+37: 1
+38: 1
+39: 1
+40: 1
+41: 1
+42: 1
+43: 1
+44: 1
+45: 1
+46: 1
+47: 1
+48: 1
+49: 1
+50: 1
+51: 1
+52: 1
+53: 1
+54: 1
+55: 1
+56: 1
+57: 1
+58: 1
+59: 1
+60: 1
+61: 1
+62: 1
+63: 1
+64: 1
+65: 1
+66: 1
+67: 1
+68: 1
+69: 1
+70: 1
+71: 1
+72: 1
+73: 1
+74: 1
+75: 1
+76: 1
+77: 1
+78: 1
+79: 1
+80: 1
+81: 1
+82: 1
+83: 1
+84: 1
+85: 1
+86: 1
+87: 1
+88: 1
+89: 1
+90: 1
+91: 1
+92: 1
+93: 1
+94: 1
+95: 1
+96: 1
+97: 1
+98: 1
+99: 1
+100: 1
+101: 1
+102: 1
+103: 1
+104: 1
+105: 1
+106: 1
+107: 1
+108: 1
+109: 1
+110: 1
+111: 1
+112: 1
+113: 1
+114: 1
+115: 1
+116: 1
+117: 1
+118: 1
+119: 1
+120: 1
+121: 1
+122: 1
+123: 1
+124: 1
+125: 1
+126: 1
+127: 1
+128: 1
+129: 1
+130: 1
+131: 1
+132: 1
+133: 1
+134: 1
+135: 1
+136: 1
+137: 1
+138: 1
+139: 1
+140: 1
+141: 1
+142: 1
+143: 1
+144: 1
+145: 1
+146: 1
+147: 1
+148: 1
+149: 1
+150: 1
+151: 1
+152: 1
+153: 1
+154: 1
+155: 1
+156: 1
+157: 1
+158: 1
+159: 1
+160: 1
+161: 1
+162: 1
+163: 1
+164: 1
+165: 1
+166: 1
+167: 1
+168: 1
+169: 1
+170: 1
+171: 1
+172: 1
+173: 1
+174: 1
+175: 1
+176: 1
+177: 1
+178: 1
+179: 1
+180: 1
+181: 1
+182: 1
+183: 1
+184: 1
+185: 1
+186: 1
+187: 1
+188: 1
+189: 1
+190: 1
+191: 1
+192: 1
+193: 1
+194: 1
+195: 1
+196: 1
+197: 1
+198: 1
+199: 1
+200: 1
+201: 1
+202: 1
+203: 1
+204: 1
+205: 1
+206: 1
+207: 1
+208: 1
+209: 1
+210: 1
+211: 1
+212: 1
+213: 1
+214: 1
+215: 1
+216: 1
+217: 1
+218: 1
+219: 1
+220: 1
+221: 1
+222: 1
+223: 1
+224: 1
+225: 1
+226: 1
+227: 1
+228: 1
+229: 1
+230: 1
+231: 1
+232: 1
+233: 1
+234: 1
+235: 1
+236: 1
+237: 1
+238: 1
+239: 1
+240: 1
+241: 1
+242: 1
+243: 1
+244: 1
+245: 1
+246: 1
+247: 1
+248: 1
+249: 1
+250: 1
+251: 1
+252: 1
+253: 1
+254: 1
+255: 1
+
--- a/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/1.snap
@ -0,0 +1,105 @@
+---
+source: milli/src/search/facet/facet_distribution_iter.rs
+---
+3: 2
+5: 2
+6: 2
+9: 2
+10: 2
+11: 2
+14: 2
+18: 2
+19: 2
+24: 2
+26: 2
+28: 2
+29: 2
+32: 2
+33: 2
+35: 2
+36: 2
+37: 2
+38: 2
+39: 2
+41: 2
+46: 2
+47: 2
+49: 2
+52: 2
+53: 2
+55: 2
+59: 2
+61: 2
+64: 2
+68: 2
+71: 2
+74: 2
+75: 2
+76: 2
+81: 2
+83: 2
+85: 2
+86: 2
+88: 2
+90: 2
+91: 2
+92: 2
+98: 2
+99: 2
+101: 2
+102: 2
+103: 2
+107: 2
+111: 2
+115: 2
+119: 2
+123: 2
+124: 2
+130: 2
+131: 2
+133: 2
+135: 2
+136: 2
+137: 2
+139: 2
+141: 2
+143: 2
+144: 2
+147: 2
+150: 2
+156: 1
+158: 1
+160: 1
+162: 1
+163: 1
+164: 1
+167: 1
+169: 1
+173: 1
+177: 1
+178: 1
+179: 1
+181: 1
+182: 1
+186: 1
+189: 1
+192: 1
+193: 1
+195: 1
+197: 1
+205: 1
+206: 1
+207: 1
+208: 1
+209: 1
+210: 1
+216: 1
+219: 1
+220: 1
+223: 1
+226: 1
+235: 1
+236: 1
+238: 1
+243: 1
+
--- a/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/0.snap
@ -0,0 +1,104 @@
+---
+source: milli/src/search/facet/facet_distribution_iter.rs
+---
+0: 1
+1: 1
+2: 1
+3: 1
+4: 1
+5: 1
+6: 1
+7: 1
+8: 1
+9: 1
+10: 1
+11: 1
+12: 1
+13: 1
+14: 1
+15: 1
+16: 1
+17: 1
+18: 1
+19: 1
+20: 1
+21: 1
+22: 1
+23: 1
+24: 1
+25: 1
+26: 1
+27: 1
+28: 1
+29: 1
+30: 1
+31: 1
+32: 1
+33: 1
+34: 1
+35: 1
+36: 1
+37: 1
+38: 1
+39: 1
+40: 1
+41: 1
+42: 1
+43: 1
+44: 1
+45: 1
+46: 1
+47: 1
+48: 1
+49: 1
+50: 1
+51: 1
+52: 1
+53: 1
+54: 1
+55: 1
+56: 1
+57: 1
+58: 1
+59: 1
+60: 1
+61: 1
+62: 1
+63: 1
+64: 1
+65: 1
+66: 1
+67: 1
+68: 1
+69: 1
+70: 1
+71: 1
+72: 1
+73: 1
+74: 1
+75: 1
+76: 1
+77: 1
+78: 1
+79: 1
+80: 1
+81: 1
+82: 1
+83: 1
+84: 1
+85: 1
+86: 1
+87: 1
+88: 1
+89: 1
+90: 1
+91: 1
+92: 1
+93: 1
+94: 1
+95: 1
+96: 1
+97: 1
+98: 1
+99: 1
+
--- a/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/1.snap
@ -0,0 +1,104 @@
+---
+source: milli/src/search/facet/facet_distribution_iter.rs
+---
+3: 2
+5: 2
+6: 2
+9: 2
+10: 2
+11: 2
+14: 2
+18: 2
+19: 2
+24: 2
+26: 2
+28: 2
+29: 2
+32: 2
+33: 2
+35: 2
+36: 2
+37: 2
+38: 2
+39: 2
+41: 2
+46: 2
+47: 2
+49: 2
+52: 2
+53: 2
+55: 2
+59: 2
+61: 2
+64: 2
+68: 2
+71: 2
+74: 2
+75: 2
+76: 2
+81: 2
+83: 2
+85: 2
+86: 2
+88: 2
+90: 2
+91: 2
+92: 2
+98: 2
+99: 2
+101: 2
+102: 2
+103: 2
+107: 2
+111: 2
+115: 2
+119: 2
+123: 2
+124: 2
+130: 2
+131: 2
+133: 2
+135: 2
+136: 2
+137: 2
+139: 2
+141: 2
+143: 2
+144: 2
+147: 2
+150: 2
+156: 1
+158: 1
+160: 1
+162: 1
+163: 1
+164: 1
+167: 1
+169: 1
+173: 1
+177: 1
+178: 1
+179: 1
+181: 1
+182: 1
+186: 1
+189: 1
+192: 1
+193: 1
+195: 1
+197: 1
+205: 1
+206: 1
+207: 1
+208: 1
+209: 1
+210: 1
+216: 1
+219: 1
+220: 1
+223: 1
+226: 1
+235: 1
+236: 1
+238: 1
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+adf484f467a31ee9460dec539621938a
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+c9939aa4977fcd4bfd35852e102dbc82
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+adf484f467a31ee9460dec539621938a
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+c9939aa4977fcd4bfd35852e102dbc82
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+618738d28ff1386b6e93d171a5acb08f
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+ffb62ab3eef55c2254c13dc0f4099849
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+618738d28ff1386b6e93d171a5acb08f
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+ffb62ab3eef55c2254c13dc0f4099849
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+9c25261cec7275cb5cfd85835904d023
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+2f97f18c15e915853e4df879be6e1f63
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+9c25261cec7275cb5cfd85835904d023
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+2f97f18c15e915853e4df879be6e1f63
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_0.snap
@ -0,0 +1,260 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+0: []
+1: []
+2: []
+3: []
+4: []
+5: []
+6: []
+7: []
+8: []
+9: []
+10: []
+11: []
+12: []
+13: []
+14: []
+15: []
+16: []
+17: []
+18: []
+19: []
+20: []
+21: []
+22: []
+23: []
+24: []
+25: []
+26: []
+27: []
+28: []
+29: []
+30: []
+31: []
+32: []
+33: []
+34: []
+35: []
+36: []
+37: []
+38: []
+39: []
+40: []
+41: []
+42: []
+43: []
+44: []
+45: []
+46: []
+47: []
+48: []
+49: []
+50: []
+51: []
+52: []
+53: []
+54: []
+55: []
+56: []
+57: []
+58: []
+59: []
+60: []
+61: []
+62: []
+63: []
+64: []
+65: []
+66: []
+67: []
+68: []
+69: []
+70: []
+71: []
+72: []
+73: []
+74: []
+75: []
+76: []
+77: []
+78: []
+79: []
+80: []
+81: []
+82: []
+83: []
+84: []
+85: []
+86: []
+87: []
+88: []
+89: []
+90: []
+91: []
+92: []
+93: []
+94: []
+95: []
+96: []
+97: []
+98: []
+99: []
+100: []
+101: []
+102: []
+103: []
+104: []
+105: []
+106: []
+107: []
+108: []
+109: []
+110: []
+111: []
+112: []
+113: []
+114: []
+115: []
+116: []
+117: []
+118: []
+119: []
+120: []
+121: []
+122: []
+123: []
+124: []
+125: []
+126: []
+127: []
+128: []
+129: []
+130: []
+131: []
+132: []
+133: []
+134: []
+135: []
+136: []
+137: []
+138: []
+139: []
+140: []
+141: []
+142: []
+143: []
+144: []
+145: []
+146: []
+147: []
+148: []
+149: []
+150: []
+151: []
+152: []
+153: []
+154: []
+155: []
+156: []
+157: []
+158: []
+159: []
+160: []
+161: []
+162: []
+163: []
+164: []
+165: []
+166: []
+167: []
+168: []
+169: []
+170: []
+171: []
+172: []
+173: []
+174: []
+175: []
+176: []
+177: []
+178: []
+179: []
+180: []
+181: []
+182: []
+183: []
+184: []
+185: []
+186: []
+187: []
+188: []
+189: []
+190: []
+191: []
+192: []
+193: []
+194: []
+195: []
+196: []
+197: []
+198: []
+199: []
+200: []
+201: []
+202: []
+203: []
+204: []
+205: []
+206: []
+207: []
+208: []
+209: []
+210: []
+211: []
+212: []
+213: []
+214: []
+215: []
+216: []
+217: []
+218: []
+219: []
+220: []
+221: []
+222: []
+223: []
+224: []
+225: []
+226: []
+227: []
+228: []
+229: []
+230: []
+231: []
+232: []
+233: []
+234: []
+235: []
+236: []
+237: []
+238: []
+239: []
+240: []
+241: []
+242: []
+243: []
+244: []
+245: []
+246: []
+247: []
+248: []
+249: []
+250: []
+251: []
+252: []
+253: []
+254: []
+255: []
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_1.snap
@ -0,0 +1,260 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+0: []
+1: []
+2: []
+3: []
+4: []
+5: []
+6: []
+7: []
+8: []
+9: []
+10: []
+11: []
+12: []
+13: []
+14: []
+15: []
+16: []
+17: []
+18: []
+19: []
+20: []
+21: []
+22: []
+23: []
+24: []
+25: []
+26: []
+27: []
+28: []
+29: []
+30: []
+31: []
+32: []
+33: []
+34: []
+35: []
+36: []
+37: []
+38: []
+39: []
+40: []
+41: []
+42: []
+43: []
+44: []
+45: []
+46: []
+47: []
+48: []
+49: []
+50: []
+51: []
+52: []
+53: []
+54: []
+55: []
+56: []
+57: []
+58: []
+59: []
+60: []
+61: []
+62: []
+63: []
+64: []
+65: []
+66: []
+67: []
+68: []
+69: []
+70: []
+71: []
+72: []
+73: []
+74: []
+75: []
+76: []
+77: []
+78: []
+79: []
+80: []
+81: []
+82: []
+83: []
+84: []
+85: []
+86: []
+87: []
+88: []
+89: []
+90: []
+91: []
+92: []
+93: []
+94: []
+95: []
+96: []
+97: []
+98: []
+99: []
+100: []
+101: []
+102: []
+103: []
+104: []
+105: []
+106: []
+107: []
+108: []
+109: []
+110: []
+111: []
+112: []
+113: []
+114: []
+115: []
+116: []
+117: []
+118: []
+119: []
+120: []
+121: []
+122: []
+123: []
+124: []
+125: []
+126: []
+127: []
+128: []
+129: []
+130: []
+131: []
+132: []
+133: []
+134: []
+135: []
+136: []
+137: []
+138: []
+139: []
+140: []
+141: []
+142: []
+143: []
+144: []
+145: []
+146: []
+147: []
+148: []
+149: []
+150: []
+151: []
+152: []
+153: []
+154: []
+155: []
+156: []
+157: []
+158: []
+159: []
+160: []
+161: []
+162: []
+163: []
+164: []
+165: []
+166: []
+167: []
+168: []
+169: []
+170: []
+171: []
+172: []
+173: []
+174: []
+175: []
+176: []
+177: []
+178: []
+179: []
+180: []
+181: []
+182: []
+183: []
+184: []
+185: []
+186: []
+187: []
+188: []
+189: []
+190: []
+191: []
+192: []
+193: []
+194: []
+195: []
+196: []
+197: []
+198: []
+199: []
+200: []
+201: []
+202: []
+203: []
+204: []
+205: []
+206: []
+207: []
+208: []
+209: []
+210: []
+211: []
+212: []
+213: []
+214: []
+215: []
+216: []
+217: []
+218: []
+219: []
+220: []
+221: []
+222: []
+223: []
+224: []
+225: []
+226: []
+227: []
+228: []
+229: []
+230: []
+231: []
+232: []
+233: []
+234: []
+235: []
+236: []
+237: []
+238: []
+239: []
+240: []
+241: []
+242: []
+243: []
+244: []
+245: []
+246: []
+247: []
+248: []
+249: []
+250: []
+251: []
+252: []
+253: []
+254: []
+255: []
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+9c25261cec7275cb5cfd85835904d023
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+2f97f18c15e915853e4df879be6e1f63
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+e849066b0e43d5c456f086c552372afc
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+8cc5e82995b0443b660f419bb9ea2e85
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+e849066b0e43d5c456f086c552372afc
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+8cc5e82995b0443b660f419bb9ea2e85
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+73b48005dc57b04f0939bbf21a68dab6
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+3c23d35627667dcee98468bfdecf09d3
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+73b48005dc57b04f0939bbf21a68dab6
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+3c23d35627667dcee98468bfdecf09d3
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+c3f8b0b858a4820a508b25b42328cedd
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+38a42f5dc25e99d7a5312a63ce94ed30
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+c3f8b0b858a4820a508b25b42328cedd
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+38a42f5dc25e99d7a5312a63ce94ed30
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+2049930204498b323885c91de88e44ca
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+7f0ca8c0fc6494f3dba46e8eb9699045
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+2049930204498b323885c91de88e44ca
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+7f0ca8c0fc6494f3dba46e8eb9699045
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+ad8fc873747aaf1d3590e7ccab735985
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+7c6cc88697da835d33877b2df41fa1cb
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+ad8fc873747aaf1d3590e7ccab735985
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+7c6cc88697da835d33877b2df41fa1cb
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+9a8c7343b4735d37704748cabcd51ff2
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+898a7dc25a1441bc3e7e2a8a62d99090
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+9a8c7343b4735d37704748cabcd51ff2
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/start_from_included_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+898a7dc25a1441bc3e7e2a8a62d99090
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_0.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+all field_id 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_1.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+all field_id 0: [3, 5, 6, 9, 10, 11, 14, 18, 19, 24, 26, 28, 29, 32, 33, 35, 36, 37, 38, 39, 41, 46, 47, 49, 52, 53, 55, 59, 61, 64, 68, 71, 74, 75, 76, 81, 83, 85, 86, 88, 90, 91, 92, 98, 99, 101, 102, 103, 105, 106, 107, 109, 110, 111, 114, 115, 118, 119, 123, 124, 126, 128, 129, 130, 131, 132, 133, 135, 136, 137, 138, 139, 141, 143, 144, 146, 147, 149, 150, 152, 153, 155, 156, 158, 159, 160, 161, 162, 163, 164, 167, 168, 169, 171, 173, 174, 175, 176, 177, 178, 179, 181, 182, 183, 185, 186, 188, 189, 190, 191, 192, 193, 195, 197, 198, 199, 201, 202, 203, 205, 206, 207, 208, 209, 210, 211, 215, 216, 219, 220, 223, 224, 226, 230, 231, 233, 235, 236, 237, 238, 239, 241, 243, 244, 247, 250, 256, 258, 260, 262, 263, 264, 267, 269, 273, 277, 278, 279, 281, 282, 286, 289, 292, 293, 295, 297, 305, 306, 307, 308, 309, 310, 316, 319, 320, 323, 326, 335, 336, 338, 343, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_2.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_2.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+all field_id 0: [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_3.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_0_3.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+all field_id 0: [3, 5, 6, 9, 10, 11, 14, 18, 19, 24, 26, 28, 29, 32, 33, 35, 36, 37, 38, 39, 41, 46, 47, 49, 52, 53, 55, 59, 61, 64, 68, 71, 74, 75, 76, 81, 83, 85, 86, 88, 90, 91, 92, 98, 99, 101, 102, 103, 105, 106, 107, 109, 110, 111, 114, 115, 118, 119, 123, 124, 126, 128, 129, 130, 131, 132, 133, 135, 136, 137, 138, 139, 141, 143, 144, 146, 147, 149, 150, 152, 153, 155, 156, 158, 159, 160, 161, 162, 163, 164, 167, 168, 169, 171, 173, 174, 175, 176, 177, 178, 179, 181, 182, 183, 185, 186, 188, 189, 190, 191, 192, 193, 195, 197, 198, 199, 201, 202, 203, 205, 206, 207, 208, 209, 210, 211, 215, 216, 219, 220, 223, 224, 226, 230, 231, 233, 235, 236, 237, 238, 239, 241, 243, 244, 247, 250, 256, 258, 260, 262, 263, 264, 267, 269, 273, 277, 278, 279, 281, 282, 286, 289, 292, 293, 295, 297, 305, 306, 307, 308, 309, 310, 316, 319, 320, 323, 326, 335, 336, 338, 343, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_0.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+all field_id 1:  []
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_1.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+all field_id 1:  []
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_2.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_2.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+all field_id 1:  [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53, 54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71, 72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89, 90, 91, 92, 93, 94, 95, 96, 97, 98, 99, 100, 101, 102, 103, 104, 105, 106, 107, 108, 109, 110, 111, 112, 113, 114, 115, 116, 117, 118, 119, 120, 121, 122, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135, 136, 137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150, 151, 152, 153, 154, 155, 156, 157, 158, 159, 160, 161, 162, 163, 164, 165, 166, 167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180, 181, 182, 183, 184, 185, 186, 187, 188, 189, 190, 191, 192, 193, 194, 195, 196, 197, 198, 199, 200, 201, 202, 203, 204, 205, 206, 207, 208, 209, 210, 211, 212, 213, 214, 215, 216, 217, 218, 219, 220, 221, 222, 223, 224, 225, 226, 227, 228, 229, 230, 231, 232, 233, 234, 235, 236, 237, 238, 239, 240, 241, 242, 243, 244, 245, 246, 247, 248, 249, 250, 251, 252, 253, 254, 255, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_3.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/unbounded_field_id_1_3.snap
@ -0,0 +1,5 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+all field_id 1:  [3, 5, 6, 9, 10, 11, 14, 18, 19, 24, 26, 28, 29, 32, 33, 35, 36, 37, 38, 39, 41, 46, 47, 49, 52, 53, 55, 59, 61, 64, 68, 71, 74, 75, 76, 81, 83, 85, 86, 88, 90, 91, 92, 98, 99, 101, 102, 103, 105, 106, 107, 109, 110, 111, 114, 115, 118, 119, 123, 124, 126, 128, 129, 130, 131, 132, 133, 135, 136, 137, 138, 139, 141, 143, 144, 146, 147, 149, 150, 152, 153, 155, 156, 158, 159, 160, 161, 162, 163, 164, 167, 168, 169, 171, 173, 174, 175, 176, 177, 178, 179, 181, 182, 183, 185, 186, 188, 189, 190, 191, 192, 193, 195, 197, 198, 199, 201, 202, 203, 205, 206, 207, 208, 209, 210, 211, 215, 216, 219, 220, 223, 224, 226, 230, 231, 233, 235, 236, 237, 238, 239, 241, 243, 244, 247, 250, 256, 258, 260, 262, 263, 264, 267, 269, 273, 277, 278, 279, 281, 282, 286, 289, 292, 293, 295, 297, 305, 306, 307, 308, 309, 310, 316, 319, 320, 323, 326, 335, 336, 338, 343, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/0.snap
@ -0,0 +1,60 @@
+---
+source: milli/src/search/facet/facet_sort_ascending.rs
+---
+[200, ]
+[201, ]
+[202, ]
+[203, ]
+[204, ]
+[205, ]
+[206, ]
+[207, ]
+[208, ]
+[209, ]
+[210, ]
+[211, ]
+[212, ]
+[213, ]
+[214, ]
+[215, ]
+[216, ]
+[217, ]
+[218, ]
+[219, ]
+[220, ]
+[221, ]
+[222, ]
+[223, ]
+[224, ]
+[225, ]
+[226, ]
+[227, ]
+[228, ]
+[229, ]
+[230, ]
+[231, ]
+[232, ]
+[233, ]
+[234, ]
+[235, ]
+[236, ]
+[237, ]
+[238, ]
+[239, ]
+[240, ]
+[241, ]
+[242, ]
+[243, ]
+[244, ]
+[245, ]
+[246, ]
+[247, ]
+[248, ]
+[249, ]
+[250, ]
+[251, ]
+[252, ]
+[253, ]
+[254, ]
+[255, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending/1.snap
@ -0,0 +1,54 @@
+---
+source: milli/src/search/facet/facet_sort_ascending.rs
+---
+[201, ]
+[202, ]
+[203, ]
+[207, ]
+[211, ]
+[215, ]
+[219, ]
+[223, ]
+[224, ]
+[230, ]
+[231, ]
+[233, ]
+[235, ]
+[236, ]
+[237, ]
+[239, ]
+[241, ]
+[243, ]
+[244, ]
+[247, ]
+[250, ]
+[256, ]
+[258, ]
+[260, ]
+[262, ]
+[263, ]
+[264, ]
+[267, ]
+[269, ]
+[273, ]
+[277, ]
+[278, ]
+[279, ]
+[281, ]
+[282, ]
+[286, ]
+[289, ]
+[292, ]
+[293, ]
+[295, ]
+[297, ]
+[205, ]
+[206, ]
+[208, ]
+[209, ]
+[210, ]
+[216, ]
+[220, ]
+[226, ]
+[238, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-0.snap
@ -0,0 +1,33 @@
+---
+source: milli/src/search/facet/facet_sort_ascending.rs
+---
+[201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, ]
+[200, ]
+[202, ]
+[204, ]
+[206, ]
+[208, ]
+[210, ]
+[212, ]
+[214, ]
+[216, ]
+[218, ]
+[220, ]
+[222, ]
+[224, ]
+[226, ]
+[228, ]
+[230, ]
+[232, ]
+[234, ]
+[236, ]
+[238, ]
+[240, ]
+[242, ]
+[244, ]
+[246, ]
+[248, ]
+[250, ]
+[252, ]
+[254, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/0-1.snap
@ -0,0 +1,33 @@
+---
+source: milli/src/search/facet/facet_sort_ascending.rs
+---
+[201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, ]
+[200, ]
+[202, ]
+[204, ]
+[206, ]
+[208, ]
+[210, ]
+[212, ]
+[214, ]
+[216, ]
+[218, ]
+[220, ]
+[222, ]
+[224, ]
+[226, ]
+[228, ]
+[230, ]
+[232, ]
+[234, ]
+[236, ]
+[238, ]
+[240, ]
+[242, ]
+[244, ]
+[246, ]
+[248, ]
+[250, ]
+[252, ]
+[254, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-0.snap
@ -0,0 +1,27 @@
+---
+source: milli/src/search/facet/facet_sort_ascending.rs
+---
+[201, 203, 205, 207, 209, 211, 215, 219, 223, 231, 233, 235, 237, 239, 241, 243, 247, 263, 267, 269, 273, 277, 279, 281, 289, 293, 295, 297, ]
+[202, ]
+[224, ]
+[230, ]
+[236, ]
+[244, ]
+[250, ]
+[256, ]
+[258, ]
+[260, ]
+[262, ]
+[264, ]
+[278, ]
+[282, ]
+[286, ]
+[292, ]
+[206, ]
+[208, ]
+[210, ]
+[216, ]
+[220, ]
+[226, ]
+[238, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_ascending.rs/filter_sort_ascending_multiple_field_ids/1-1.snap
@ -0,0 +1,27 @@
+---
+source: milli/src/search/facet/facet_sort_ascending.rs
+---
+[201, 203, 205, 207, 209, 211, 215, 219, 223, 231, 233, 235, 237, 239, 241, 243, 247, 263, 267, 269, 273, 277, 279, 281, 289, 293, 295, 297, ]
+[202, ]
+[224, ]
+[230, ]
+[236, ]
+[244, ]
+[250, ]
+[256, ]
+[258, ]
+[260, ]
+[262, ]
+[264, ]
+[278, ]
+[282, ]
+[286, ]
+[292, ]
+[206, ]
+[208, ]
+[210, ]
+[216, ]
+[220, ]
+[226, ]
+[238, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/0.snap
@ -0,0 +1,60 @@
+---
+source: milli/src/search/facet/facet_sort_descending.rs
+---
+[255, ]
+[254, ]
+[253, ]
+[252, ]
+[251, ]
+[250, ]
+[249, ]
+[248, ]
+[247, ]
+[246, ]
+[245, ]
+[244, ]
+[243, ]
+[242, ]
+[241, ]
+[240, ]
+[239, ]
+[238, ]
+[237, ]
+[236, ]
+[235, ]
+[234, ]
+[233, ]
+[232, ]
+[231, ]
+[230, ]
+[229, ]
+[228, ]
+[227, ]
+[226, ]
+[225, ]
+[224, ]
+[223, ]
+[222, ]
+[221, ]
+[220, ]
+[219, ]
+[218, ]
+[217, ]
+[216, ]
+[215, ]
+[214, ]
+[213, ]
+[212, ]
+[211, ]
+[210, ]
+[209, ]
+[208, ]
+[207, ]
+[206, ]
+[205, ]
+[204, ]
+[203, ]
+[202, ]
+[201, ]
+[200, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/1.snap
@ -0,0 +1,54 @@
+---
+source: milli/src/search/facet/facet_sort_descending.rs
+---
+[243, ]
+[238, ]
+[236, ]
+[235, ]
+[226, ]
+[223, ]
+[220, ]
+[219, ]
+[216, ]
+[210, ]
+[209, ]
+[208, ]
+[207, ]
+[206, ]
+[205, ]
+[297, ]
+[295, ]
+[293, ]
+[292, ]
+[289, ]
+[286, ]
+[282, ]
+[281, ]
+[279, ]
+[278, ]
+[277, ]
+[273, ]
+[269, ]
+[267, ]
+[264, ]
+[263, ]
+[262, ]
+[260, ]
+[258, ]
+[256, ]
+[250, ]
+[247, ]
+[244, ]
+[241, ]
+[239, ]
+[237, ]
+[233, ]
+[231, ]
+[230, ]
+[224, ]
+[215, ]
+[211, ]
+[203, ]
+[202, ]
+[201, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/2.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending/2.snap
@ -0,0 +1,60 @@
+---
+source: milli/src/search/facet/facet_sort_descending.rs
+---
+[255, ]
+[254, ]
+[253, ]
+[252, ]
+[251, ]
+[250, ]
+[249, ]
+[248, ]
+[247, ]
+[246, ]
+[245, ]
+[244, ]
+[243, ]
+[242, ]
+[241, ]
+[240, ]
+[239, ]
+[238, ]
+[237, ]
+[236, ]
+[235, ]
+[234, ]
+[233, ]
+[232, ]
+[231, ]
+[230, ]
+[229, ]
+[228, ]
+[227, ]
+[226, ]
+[225, ]
+[224, ]
+[223, ]
+[222, ]
+[221, ]
+[220, ]
+[219, ]
+[218, ]
+[217, ]
+[216, ]
+[215, ]
+[214, ]
+[213, ]
+[212, ]
+[211, ]
+[210, ]
+[209, ]
+[208, ]
+[207, ]
+[206, ]
+[205, ]
+[204, ]
+[203, ]
+[202, ]
+[201, ]
+[200, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-0.snap
@ -0,0 +1,33 @@
+---
+source: milli/src/search/facet/facet_sort_descending.rs
+---
+[254, ]
+[252, ]
+[250, ]
+[248, ]
+[246, ]
+[244, ]
+[242, ]
+[240, ]
+[238, ]
+[236, ]
+[234, ]
+[232, ]
+[230, ]
+[228, ]
+[226, ]
+[224, ]
+[222, ]
+[220, ]
+[218, ]
+[216, ]
+[214, ]
+[212, ]
+[210, ]
+[208, ]
+[206, ]
+[204, ]
+[202, ]
+[200, ]
+[201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/0-1.snap
@ -0,0 +1,33 @@
+---
+source: milli/src/search/facet/facet_sort_descending.rs
+---
+[254, ]
+[252, ]
+[250, ]
+[248, ]
+[246, ]
+[244, ]
+[242, ]
+[240, ]
+[238, ]
+[236, ]
+[234, ]
+[232, ]
+[230, ]
+[228, ]
+[226, ]
+[224, ]
+[222, ]
+[220, ]
+[218, ]
+[216, ]
+[214, ]
+[212, ]
+[210, ]
+[208, ]
+[206, ]
+[204, ]
+[202, ]
+[200, ]
+[201, 203, 205, 207, 209, 211, 213, 215, 217, 219, 221, 223, 225, 227, 229, 231, 233, 235, 237, 239, 241, 243, 245, 247, 249, 251, 253, 255, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-0.snap
@ -0,0 +1,27 @@
+---
+source: milli/src/search/facet/facet_sort_descending.rs
+---
+[238, ]
+[236, ]
+[226, ]
+[220, ]
+[216, ]
+[210, ]
+[208, ]
+[206, ]
+[292, ]
+[286, ]
+[282, ]
+[278, ]
+[264, ]
+[262, ]
+[260, ]
+[258, ]
+[256, ]
+[250, ]
+[244, ]
+[230, ]
+[224, ]
+[202, ]
+[201, 203, 205, 207, 209, 211, 215, 219, 223, 231, 233, 235, 237, 239, 241, 243, 247, 263, 267, 269, 273, 277, 279, 281, 289, 293, 295, 297, ]
+
--- a/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_sort_descending.rs/filter_sort_descending_multiple_field_ids/1-1.snap
@ -0,0 +1,27 @@
+---
+source: milli/src/search/facet/facet_sort_descending.rs
+---
+[238, ]
+[236, ]
+[226, ]
+[220, ]
+[216, ]
+[210, ]
+[208, ]
+[206, ]
+[292, ]
+[286, ]
+[282, ]
+[278, ]
+[264, ]
+[262, ]
+[260, ]
+[258, ]
+[256, ]
+[250, ]
+[244, ]
+[230, ]
+[224, ]
+[202, ]
+[201, 203, 205, 207, 209, 211, 215, 219, 223, 231, 233, 235, 237, 239, 241, 243, 247, 263, 267, 269, 273, 277, 279, 281, 289, 293, 295, 297, ]
+
--- a/crates/milli/src/search/fst_utils.rs
+++ b/crates/milli/src/search/fst_utils.rs
@ -0,0 +1,187 @@
+/// This mod is necessary until https://github.com/BurntSushi/fst/pull/137 gets merged.
+/// All credits for this code go to BurntSushi.
+use fst::Automaton;
+
+pub struct StartsWith<A>(pub A);
+
+/// The `Automaton` state for `StartsWith<A>`.
+pub struct StartsWithState<A: Automaton>(pub StartsWithStateKind<A>);
+
+impl<A: Automaton> Clone for StartsWithState<A>
+where
+    A::State: Clone,
+{
+    fn clone(&self) -> Self {
+        Self(self.0.clone())
+    }
+}
+
+/// The inner state of a `StartsWithState<A>`.
+pub enum StartsWithStateKind<A: Automaton> {
+    /// Sink state that is reached when the automaton has matched the prefix.
+    Done,
+    /// State in which the automaton is while it hasn't matched the prefix.
+    Running(A::State),
+}
+
+impl<A: Automaton> Clone for StartsWithStateKind<A>
+where
+    A::State: Clone,
+{
+    fn clone(&self) -> Self {
+        match self {
+            StartsWithStateKind::Done => StartsWithStateKind::Done,
+            StartsWithStateKind::Running(inner) => StartsWithStateKind::Running(inner.clone()),
+        }
+    }
+}
+
+impl<A: Automaton> Automaton for StartsWith<A> {
+    type State = StartsWithState<A>;
+
+    fn start(&self) -> StartsWithState<A> {
+        StartsWithState({
+            let inner = self.0.start();
+            if self.0.is_match(&inner) {
+                StartsWithStateKind::Done
+            } else {
+                StartsWithStateKind::Running(inner)
+            }
+        })
+    }
+    fn is_match(&self, state: &StartsWithState<A>) -> bool {
+        match state.0 {
+            StartsWithStateKind::Done => true,
+            StartsWithStateKind::Running(_) => false,
+        }
+    }
+    fn can_match(&self, state: &StartsWithState<A>) -> bool {
+        match state.0 {
+            StartsWithStateKind::Done => true,
+            StartsWithStateKind::Running(ref inner) => self.0.can_match(inner),
+        }
+    }
+    fn will_always_match(&self, state: &StartsWithState<A>) -> bool {
+        match state.0 {
+            StartsWithStateKind::Done => true,
+            StartsWithStateKind::Running(_) => false,
+        }
+    }
+    fn accept(&self, state: &StartsWithState<A>, byte: u8) -> StartsWithState<A> {
+        StartsWithState(match state.0 {
+            StartsWithStateKind::Done => StartsWithStateKind::Done,
+            StartsWithStateKind::Running(ref inner) => {
+                let next_inner = self.0.accept(inner, byte);
+                if self.0.is_match(&next_inner) {
+                    StartsWithStateKind::Done
+                } else {
+                    StartsWithStateKind::Running(next_inner)
+                }
+            }
+        })
+    }
+}
+/// An automaton that matches when one of its component automata match.
+#[derive(Clone, Debug)]
+pub struct Union<A, B>(pub A, pub B);
+
+/// The `Automaton` state for `Union<A, B>`.
+pub struct UnionState<A: Automaton, B: Automaton>(pub A::State, pub B::State);
+
+impl<A: Automaton, B: Automaton> Clone for UnionState<A, B>
+where
+    A::State: Clone,
+    B::State: Clone,
+{
+    fn clone(&self) -> Self {
+        Self(self.0.clone(), self.1.clone())
+    }
+}
+
+impl<A: Automaton, B: Automaton> Automaton for Union<A, B> {
+    type State = UnionState<A, B>;
+    fn start(&self) -> UnionState<A, B> {
+        UnionState(self.0.start(), self.1.start())
+    }
+    fn is_match(&self, state: &UnionState<A, B>) -> bool {
+        self.0.is_match(&state.0) || self.1.is_match(&state.1)
+    }
+    fn can_match(&self, state: &UnionState<A, B>) -> bool {
+        self.0.can_match(&state.0) || self.1.can_match(&state.1)
+    }
+    fn will_always_match(&self, state: &UnionState<A, B>) -> bool {
+        self.0.will_always_match(&state.0) || self.1.will_always_match(&state.1)
+    }
+    fn accept(&self, state: &UnionState<A, B>, byte: u8) -> UnionState<A, B> {
+        UnionState(self.0.accept(&state.0, byte), self.1.accept(&state.1, byte))
+    }
+}
+/// An automaton that matches when both of its component automata match.
+#[derive(Clone, Debug)]
+pub struct Intersection<A, B>(pub A, pub B);
+
+/// The `Automaton` state for `Intersection<A, B>`.
+pub struct IntersectionState<A: Automaton, B: Automaton>(pub A::State, pub B::State);
+
+impl<A: Automaton, B: Automaton> Clone for IntersectionState<A, B>
+where
+    A::State: Clone,
+    B::State: Clone,
+{
+    fn clone(&self) -> Self {
+        Self(self.0.clone(), self.1.clone())
+    }
+}
+
+impl<A: Automaton, B: Automaton> Automaton for Intersection<A, B> {
+    type State = IntersectionState<A, B>;
+    fn start(&self) -> IntersectionState<A, B> {
+        IntersectionState(self.0.start(), self.1.start())
+    }
+    fn is_match(&self, state: &IntersectionState<A, B>) -> bool {
+        self.0.is_match(&state.0) && self.1.is_match(&state.1)
+    }
+    fn can_match(&self, state: &IntersectionState<A, B>) -> bool {
+        self.0.can_match(&state.0) && self.1.can_match(&state.1)
+    }
+    fn will_always_match(&self, state: &IntersectionState<A, B>) -> bool {
+        self.0.will_always_match(&state.0) && self.1.will_always_match(&state.1)
+    }
+    fn accept(&self, state: &IntersectionState<A, B>, byte: u8) -> IntersectionState<A, B> {
+        IntersectionState(self.0.accept(&state.0, byte), self.1.accept(&state.1, byte))
+    }
+}
+/// An automaton that matches exactly when the automaton it wraps does not.
+#[derive(Clone, Debug)]
+pub struct Complement<A>(pub A);
+
+/// The `Automaton` state for `Complement<A>`.
+pub struct ComplementState<A: Automaton>(pub A::State);
+
+impl<A: Automaton> Clone for ComplementState<A>
+where
+    A::State: Clone,
+{
+    fn clone(&self) -> Self {
+        Self(self.0.clone())
+    }
+}
+
+impl<A: Automaton> Automaton for Complement<A> {
+    type State = ComplementState<A>;
+    fn start(&self) -> ComplementState<A> {
+        ComplementState(self.0.start())
+    }
+    fn is_match(&self, state: &ComplementState<A>) -> bool {
+        !self.0.is_match(&state.0)
+    }
+    fn can_match(&self, state: &ComplementState<A>) -> bool {
+        !self.0.will_always_match(&state.0)
+    }
+    fn will_always_match(&self, state: &ComplementState<A>) -> bool {
+        !self.0.can_match(&state.0)
+    }
+    fn accept(&self, state: &ComplementState<A>, byte: u8) -> ComplementState<A> {
+        ComplementState(self.0.accept(&state.0, byte))
+    }
+}
--- a/crates/milli/src/search/hybrid.rs
+++ b/crates/milli/src/search/hybrid.rs
@ -0,0 +1,289 @@
+use std::cmp::Ordering;
+
+use itertools::Itertools;
+use roaring::RoaringBitmap;
+
+use crate::score_details::{ScoreDetails, ScoreValue, ScoringStrategy};
+use crate::search::SemanticSearch;
+use crate::{MatchingWords, Result, Search, SearchResult};
+
+struct ScoreWithRatioResult {
+    matching_words: MatchingWords,
+    candidates: RoaringBitmap,
+    document_scores: Vec<(u32, ScoreWithRatio)>,
+    degraded: bool,
+    used_negative_operator: bool,
+}
+
+type ScoreWithRatio = (Vec<ScoreDetails>, f32);
+
+#[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
+fn compare_scores(
+    &(ref left_scores, left_ratio): &ScoreWithRatio,
+    &(ref right_scores, right_ratio): &ScoreWithRatio,
+) -> Ordering {
+    let mut left_it = ScoreDetails::score_values(left_scores.iter());
+    let mut right_it = ScoreDetails::score_values(right_scores.iter());
+
+    loop {
+        let left = left_it.next();
+        let right = right_it.next();
+
+        match (left, right) {
+            (None, None) => return Ordering::Equal,
+            (None, Some(_)) => return Ordering::Less,
+            (Some(_), None) => return Ordering::Greater,
+            (Some(ScoreValue::Score(left)), Some(ScoreValue::Score(right))) => {
+                let left = left * left_ratio as f64;
+                let right = right * right_ratio as f64;
+                if (left - right).abs() <= f64::EPSILON {
+                    continue;
+                }
+                return left.partial_cmp(&right).unwrap();
+            }
+            (Some(ScoreValue::Sort(left)), Some(ScoreValue::Sort(right))) => {
+                match left.partial_cmp(right).unwrap() {
+                    Ordering::Equal => continue,
+                    order => return order,
+                }
+            }
+            (Some(ScoreValue::GeoSort(left)), Some(ScoreValue::GeoSort(right))) => {
+                match left.partial_cmp(right).unwrap() {
+                    Ordering::Equal => continue,
+                    order => return order,
+                }
+            }
+            (Some(ScoreValue::Score(x)), Some(_)) => {
+                return if x == 0. { Ordering::Less } else { Ordering::Greater }
+            }
+            (Some(_), Some(ScoreValue::Score(x))) => {
+                return if x == 0. { Ordering::Greater } else { Ordering::Less }
+            }
+            // if we have this, we're bad
+            (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_)))
+            | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => {
+                unreachable!("Unexpected geo and sort comparison")
+            }
+        }
+    }
+}
+
+impl ScoreWithRatioResult {
+    fn new(results: SearchResult, ratio: f32) -> Self {
+        let document_scores = results
+            .documents_ids
+            .into_iter()
+            .zip(results.document_scores.into_iter().map(|scores| (scores, ratio)))
+            .collect();
+
+        Self {
+            matching_words: results.matching_words,
+            candidates: results.candidates,
+            document_scores,
+            degraded: results.degraded,
+            used_negative_operator: results.used_negative_operator,
+        }
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
+    fn merge(
+        vector_results: Self,
+        keyword_results: Self,
+        from: usize,
+        length: usize,
+    ) -> (SearchResult, u32) {
+        #[derive(Clone, Copy)]
+        enum ResultSource {
+            Semantic,
+            Keyword,
+        }
+        let mut semantic_hit_count = 0;
+
+        let mut documents_ids = Vec::with_capacity(
+            vector_results.document_scores.len() + keyword_results.document_scores.len(),
+        );
+        let mut document_scores = Vec::with_capacity(
+            vector_results.document_scores.len() + keyword_results.document_scores.len(),
+        );
+
+        let mut documents_seen = RoaringBitmap::new();
+        for ((docid, (main_score, _sub_score)), source) in vector_results
+            .document_scores
+            .into_iter()
+            .zip(std::iter::repeat(ResultSource::Semantic))
+            .merge_by(
+                keyword_results
+                    .document_scores
+                    .into_iter()
+                    .zip(std::iter::repeat(ResultSource::Keyword)),
+                |((_, left), _), ((_, right), _)| {
+                    // the first value is the one with the greatest score
+                    compare_scores(left, right).is_ge()
+                },
+            )
+            // remove documents we already saw
+            .filter(|((docid, _), _)| documents_seen.insert(*docid))
+            // start skipping **after** the filter
+            .skip(from)
+            // take **after** skipping
+            .take(length)
+        {
+            if let ResultSource::Semantic = source {
+                semantic_hit_count += 1;
+            }
+            documents_ids.push(docid);
+            // TODO: pass both scores to documents_score in some way?
+            document_scores.push(main_score);
+        }
+
+        (
+            SearchResult {
+                matching_words: keyword_results.matching_words,
+                candidates: vector_results.candidates | keyword_results.candidates,
+                documents_ids,
+                document_scores,
+                degraded: vector_results.degraded | keyword_results.degraded,
+                used_negative_operator: vector_results.used_negative_operator
+                    | keyword_results.used_negative_operator,
+            },
+            semantic_hit_count,
+        )
+    }
+}
+
+impl<'a> Search<'a> {
+    #[tracing::instrument(level = "trace", skip_all, target = "search::hybrid")]
+    pub fn execute_hybrid(&self, semantic_ratio: f32) -> Result<(SearchResult, Option<u32>)> {
+        // TODO: find classier way to achieve that than to reset vector and query params
+        // create separate keyword and semantic searches
+        let mut search = Search {
+            query: self.query.clone(),
+            filter: self.filter.clone(),
+            offset: 0,
+            limit: self.limit + self.offset,
+            sort_criteria: self.sort_criteria.clone(),
+            distinct: self.distinct.clone(),
+            searchable_attributes: self.searchable_attributes,
+            geo_strategy: self.geo_strategy,
+            terms_matching_strategy: self.terms_matching_strategy,
+            scoring_strategy: ScoringStrategy::Detailed,
+            words_limit: self.words_limit,
+            exhaustive_number_hits: self.exhaustive_number_hits,
+            rtxn: self.rtxn,
+            index: self.index,
+            semantic: self.semantic.clone(),
+            time_budget: self.time_budget.clone(),
+            ranking_score_threshold: self.ranking_score_threshold,
+            locales: self.locales.clone(),
+        };
+
+        let semantic = search.semantic.take();
+        let keyword_results = search.execute()?;
+
+        // completely skip semantic search if the results of the keyword search are good enough
+        if self.results_good_enough(&keyword_results, semantic_ratio) {
+            return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
+        }
+
+        // no vector search against placeholder search
+        let Some(query) = search.query.take() else {
+            return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
+        };
+        // no embedder, no semantic search
+        let Some(SemanticSearch { vector, embedder_name, embedder, quantized }) = semantic else {
+            return Ok(return_keyword_results(self.limit, self.offset, keyword_results));
+        };
+
+        let vector_query = match vector {
+            Some(vector_query) => vector_query,
+            None => {
+                // attempt to embed the vector
+                let span = tracing::trace_span!(target: "search::hybrid", "embed_one");
+                let _entered = span.enter();
+
+                match embedder.embed_one(query) {
+                    Ok(embedding) => embedding,
+                    Err(error) => {
+                        tracing::error!(error=%error, "Embedding failed");
+                        return Ok((keyword_results, Some(0)));
+                    }
+                }
+            }
+        };
+
+        search.semantic =
+            Some(SemanticSearch { vector: Some(vector_query), embedder_name, embedder, quantized });
+
+        // TODO: would be better to have two distinct functions at this point
+        let vector_results = search.execute()?;
+
+        let keyword_results = ScoreWithRatioResult::new(keyword_results, 1.0 - semantic_ratio);
+        let vector_results = ScoreWithRatioResult::new(vector_results, semantic_ratio);
+
+        let (merge_results, semantic_hit_count) =
+            ScoreWithRatioResult::merge(vector_results, keyword_results, self.offset, self.limit);
+        assert!(merge_results.documents_ids.len() <= self.limit);
+        Ok((merge_results, Some(semantic_hit_count)))
+    }
+
+    fn results_good_enough(&self, keyword_results: &SearchResult, semantic_ratio: f32) -> bool {
+        // A result is good enough if its keyword score is > 0.9 with a semantic ratio of 0.5 => 0.9 * 0.5
+        const GOOD_ENOUGH_SCORE: f64 = 0.45;
+
+        // 1. we check that we got a sufficient number of results
+        if keyword_results.document_scores.len() < self.limit + self.offset {
+            return false;
+        }
+
+        // 2. and that all results have a good enough score.
+        // we need to check all results because due to sort like rules, they're not necessarily in relevancy order
+        for score in &keyword_results.document_scores {
+            let score = ScoreDetails::global_score(score.iter());
+            if score * ((1.0 - semantic_ratio) as f64) < GOOD_ENOUGH_SCORE {
+                return false;
+            }
+        }
+        true
+    }
+}
+
+fn return_keyword_results(
+    limit: usize,
+    offset: usize,
+    SearchResult {
+        matching_words,
+        candidates,
+        mut documents_ids,
+        mut document_scores,
+        degraded,
+        used_negative_operator,
+    }: SearchResult,
+) -> (SearchResult, Option<u32>) {
+    let (documents_ids, document_scores) = if offset >= documents_ids.len() ||
+    // technically redudant because documents_ids.len() == document_scores.len(),
+    // defensive programming
+    offset >= document_scores.len()
+    {
+        (vec![], vec![])
+    } else {
+        // PANICS: offset < len
+        documents_ids.rotate_left(offset);
+        documents_ids.truncate(limit);
+
+        // PANICS: offset < len
+        document_scores.rotate_left(offset);
+        document_scores.truncate(limit);
+        (documents_ids, document_scores)
+    };
+    (
+        SearchResult {
+            matching_words,
+            candidates,
+            documents_ids,
+            document_scores,
+            degraded,
+            used_negative_operator,
+        },
+        Some(0),
+    )
+}
--- a/crates/milli/src/search/mod.rs
+++ b/crates/milli/src/search/mod.rs
@ -0,0 +1,413 @@
+use std::fmt;
+use std::sync::Arc;
+
+use charabia::Language;
+use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
+use once_cell::sync::Lazy;
+use roaring::bitmap::RoaringBitmap;
+
+pub use self::facet::{FacetDistribution, Filter, OrderBy, DEFAULT_VALUES_PER_FACET};
+pub use self::new::matches::{FormatOptions, MatchBounds, MatcherBuilder, MatchingWords};
+use self::new::{execute_vector_search, PartialSearchResult};
+use crate::score_details::{ScoreDetails, ScoringStrategy};
+use crate::vector::Embedder;
+use crate::{
+    execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Error, Index,
+    Result, SearchContext, TimeBudget, UserError,
+};
+
+// Building these factories is not free.
+static LEVDIST0: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(0, true));
+static LEVDIST1: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(1, true));
+static LEVDIST2: Lazy<LevBuilder> = Lazy::new(|| LevBuilder::new(2, true));
+
+pub mod facet;
+mod fst_utils;
+pub mod hybrid;
+pub mod new;
+pub mod similar;
+
+#[derive(Debug, Clone)]
+pub struct SemanticSearch {
+    vector: Option<Vec<f32>>,
+    embedder_name: String,
+    embedder: Arc<Embedder>,
+    quantized: bool,
+}
+
+pub struct Search<'a> {
+    query: Option<String>,
+    // this should be linked to the String in the query
+    filter: Option<Filter<'a>>,
+    offset: usize,
+    limit: usize,
+    sort_criteria: Option<Vec<AscDesc>>,
+    distinct: Option<String>,
+    searchable_attributes: Option<&'a [String]>,
+    geo_strategy: new::GeoSortStrategy,
+    terms_matching_strategy: TermsMatchingStrategy,
+    scoring_strategy: ScoringStrategy,
+    words_limit: usize,
+    exhaustive_number_hits: bool,
+    rtxn: &'a heed::RoTxn<'a>,
+    index: &'a Index,
+    semantic: Option<SemanticSearch>,
+    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
+    locales: Option<Vec<Language>>,
+}
+
+impl<'a> Search<'a> {
+    pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> Search<'a> {
+        Search {
+            query: None,
+            filter: None,
+            offset: 0,
+            limit: 20,
+            sort_criteria: None,
+            distinct: None,
+            searchable_attributes: None,
+            geo_strategy: new::GeoSortStrategy::default(),
+            terms_matching_strategy: TermsMatchingStrategy::default(),
+            scoring_strategy: Default::default(),
+            exhaustive_number_hits: false,
+            words_limit: 10,
+            rtxn,
+            index,
+            semantic: None,
+            locales: None,
+            time_budget: TimeBudget::max(),
+            ranking_score_threshold: None,
+        }
+    }
+
+    pub fn query(&mut self, query: impl Into<String>) -> &mut Search<'a> {
+        self.query = Some(query.into());
+        self
+    }
+
+    pub fn semantic(
+        &mut self,
+        embedder_name: String,
+        embedder: Arc<Embedder>,
+        quantized: bool,
+        vector: Option<Vec<f32>>,
+    ) -> &mut Search<'a> {
+        self.semantic = Some(SemanticSearch { embedder_name, embedder, quantized, vector });
+        self
+    }
+
+    pub fn offset(&mut self, offset: usize) -> &mut Search<'a> {
+        self.offset = offset;
+        self
+    }
+
+    pub fn limit(&mut self, limit: usize) -> &mut Search<'a> {
+        self.limit = limit;
+        self
+    }
+
+    pub fn sort_criteria(&mut self, criteria: Vec<AscDesc>) -> &mut Search<'a> {
+        self.sort_criteria = Some(criteria);
+        self
+    }
+
+    pub fn distinct(&mut self, distinct: String) -> &mut Search<'a> {
+        self.distinct = Some(distinct);
+        self
+    }
+
+    pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> {
+        self.searchable_attributes = Some(searchable);
+        self
+    }
+
+    pub fn terms_matching_strategy(&mut self, value: TermsMatchingStrategy) -> &mut Search<'a> {
+        self.terms_matching_strategy = value;
+        self
+    }
+
+    pub fn scoring_strategy(&mut self, value: ScoringStrategy) -> &mut Search<'a> {
+        self.scoring_strategy = value;
+        self
+    }
+
+    pub fn words_limit(&mut self, value: usize) -> &mut Search<'a> {
+        self.words_limit = value;
+        self
+    }
+
+    pub fn filter(&mut self, condition: Filter<'a>) -> &mut Search<'a> {
+        self.filter = Some(condition);
+        self
+    }
+
+    #[cfg(test)]
+    pub fn geo_sort_strategy(&mut self, strategy: new::GeoSortStrategy) -> &mut Search<'a> {
+        self.geo_strategy = strategy;
+        self
+    }
+
+    /// Forces the search to exhaustively compute the number of candidates,
+    /// this will increase the search time but allows finite pagination.
+    pub fn exhaustive_number_hits(&mut self, exhaustive_number_hits: bool) -> &mut Search<'a> {
+        self.exhaustive_number_hits = exhaustive_number_hits;
+        self
+    }
+
+    pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> {
+        self.time_budget = time_budget;
+        self
+    }
+
+    pub fn ranking_score_threshold(&mut self, ranking_score_threshold: f64) -> &mut Search<'a> {
+        self.ranking_score_threshold = Some(ranking_score_threshold);
+        self
+    }
+
+    pub fn locales(&mut self, locales: Vec<Language>) -> &mut Search<'a> {
+        self.locales = Some(locales);
+        self
+    }
+
+    pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
+        if has_vector_search {
+            let ctx = SearchContext::new(self.index, self.rtxn)?;
+            filtered_universe(ctx.index, ctx.txn, &self.filter)
+        } else {
+            Ok(self.execute()?.candidates)
+        }
+    }
+
+    pub fn execute(&self) -> Result<SearchResult> {
+        let mut ctx = SearchContext::new(self.index, self.rtxn)?;
+
+        if let Some(searchable_attributes) = self.searchable_attributes {
+            ctx.attributes_to_search_on(searchable_attributes)?;
+        }
+
+        if let Some(distinct) = &self.distinct {
+            let filterable_fields = ctx.index.filterable_fields(ctx.txn)?;
+            if !crate::is_faceted(distinct, &filterable_fields) {
+                let (valid_fields, hidden_fields) =
+                    ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?;
+                return Err(Error::UserError(UserError::InvalidDistinctAttribute {
+                    field: distinct.clone(),
+                    valid_fields,
+                    hidden_fields,
+                }));
+            }
+        }
+
+        let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
+        let PartialSearchResult {
+            located_query_terms,
+            candidates,
+            documents_ids,
+            document_scores,
+            degraded,
+            used_negative_operator,
+        } = match self.semantic.as_ref() {
+            Some(SemanticSearch { vector: Some(vector), embedder_name, embedder, quantized }) => {
+                execute_vector_search(
+                    &mut ctx,
+                    vector,
+                    self.scoring_strategy,
+                    universe,
+                    &self.sort_criteria,
+                    &self.distinct,
+                    self.geo_strategy,
+                    self.offset,
+                    self.limit,
+                    embedder_name,
+                    embedder,
+                    *quantized,
+                    self.time_budget.clone(),
+                    self.ranking_score_threshold,
+                )?
+            }
+            _ => execute_search(
+                &mut ctx,
+                self.query.as_deref(),
+                self.terms_matching_strategy,
+                self.scoring_strategy,
+                self.exhaustive_number_hits,
+                universe,
+                &self.sort_criteria,
+                &self.distinct,
+                self.geo_strategy,
+                self.offset,
+                self.limit,
+                Some(self.words_limit),
+                &mut DefaultSearchLogger,
+                &mut DefaultSearchLogger,
+                self.time_budget.clone(),
+                self.ranking_score_threshold,
+                self.locales.as_ref(),
+            )?,
+        };
+
+        // consume context and located_query_terms to build MatchingWords.
+        let matching_words = match located_query_terms {
+            Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
+            None => MatchingWords::default(),
+        };
+
+        Ok(SearchResult {
+            matching_words,
+            candidates,
+            document_scores,
+            documents_ids,
+            degraded,
+            used_negative_operator,
+        })
+    }
+}
+
+impl fmt::Debug for Search<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let Search {
+            query,
+            filter,
+            offset,
+            limit,
+            sort_criteria,
+            distinct,
+            searchable_attributes,
+            geo_strategy: _,
+            terms_matching_strategy,
+            scoring_strategy,
+            words_limit,
+            exhaustive_number_hits,
+            rtxn: _,
+            index: _,
+            semantic,
+            time_budget,
+            ranking_score_threshold,
+            locales,
+        } = self;
+        f.debug_struct("Search")
+            .field("query", query)
+            .field("vector", &"[...]")
+            .field("filter", filter)
+            .field("offset", offset)
+            .field("limit", limit)
+            .field("sort_criteria", sort_criteria)
+            .field("distinct", distinct)
+            .field("searchable_attributes", searchable_attributes)
+            .field("terms_matching_strategy", terms_matching_strategy)
+            .field("scoring_strategy", scoring_strategy)
+            .field("exhaustive_number_hits", exhaustive_number_hits)
+            .field("words_limit", words_limit)
+            .field(
+                "semantic.embedder_name",
+                &semantic.as_ref().map(|semantic| &semantic.embedder_name),
+            )
+            .field("time_budget", time_budget)
+            .field("ranking_score_threshold", ranking_score_threshold)
+            .field("locales", locales)
+            .finish()
+    }
+}
+
+#[derive(Default, Debug)]
+pub struct SearchResult {
+    pub matching_words: MatchingWords,
+    pub candidates: RoaringBitmap,
+    pub documents_ids: Vec<DocumentId>,
+    pub document_scores: Vec<Vec<ScoreDetails>>,
+    pub degraded: bool,
+    pub used_negative_operator: bool,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum TermsMatchingStrategy {
+    // remove last word first
+    Last,
+    // all words are mandatory
+    All,
+    // remove more frequent word first
+    Frequency,
+}
+
+impl Default for TermsMatchingStrategy {
+    fn default() -> Self {
+        Self::Last
+    }
+}
+
+fn get_first(s: &str) -> &str {
+    match s.chars().next() {
+        Some(c) => &s[..c.len_utf8()],
+        None => panic!("unexpected empty query"),
+    }
+}
+
+pub fn build_dfa(word: &str, typos: u8, is_prefix: bool) -> DFA {
+    let lev = match typos {
+        0 => &LEVDIST0,
+        1 => &LEVDIST1,
+        _ => &LEVDIST2,
+    };
+
+    if is_prefix {
+        lev.build_prefix_dfa(word)
+    } else {
+        lev.build_dfa(word)
+    }
+}
+
+#[cfg(test)]
+mod test {
+    #[allow(unused_imports)]
+    use super::*;
+
+    #[cfg(feature = "japanese")]
+    #[cfg(not(feature = "chinese-pinyin"))]
+    #[test]
+    fn test_kanji_language_detection() {
+        use crate::index::tests::TempIndex;
+
+        let index = TempIndex::new();
+
+        index
+            .add_documents(documents!([
+                { "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
+                { "id": 1, "title": "東京のお寿司。" },
+                { "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
+            ]))
+            .unwrap();
+
+        let txn = index.write_txn().unwrap();
+        let mut search = Search::new(&txn, &index);
+
+        search.query("東京");
+        let SearchResult { documents_ids, .. } = search.execute().unwrap();
+
+        assert_eq!(documents_ids, vec![1]);
+    }
+
+    #[cfg(feature = "korean")]
+    #[test]
+    fn test_hangul_language_detection() {
+        use crate::index::tests::TempIndex;
+
+        let index = TempIndex::new();
+
+        index
+            .add_documents(documents!([
+                { "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
+                { "id": 1, "title": "김밥먹을래。" },
+                { "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
+            ]))
+            .unwrap();
+
+        let txn = index.write_txn().unwrap();
+        let mut search = Search::new(&txn, &index);
+
+        search.query("김밥");
+        let SearchResult { documents_ids, .. } = search.execute().unwrap();
+
+        assert_eq!(documents_ids, vec![1]);
+    }
+}
--- a/crates/milli/src/search/new/bucket_sort.rs
+++ b/crates/milli/src/search/new/bucket_sort.rs
@ -0,0 +1,366 @@
+use roaring::RoaringBitmap;
+
+use super::logger::SearchLogger;
+use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait};
+use super::SearchContext;
+use crate::score_details::{ScoreDetails, ScoringStrategy};
+use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput};
+use crate::{Result, TimeBudget};
+
+pub struct BucketSortOutput {
+    pub docids: Vec<u32>,
+    pub scores: Vec<Vec<ScoreDetails>>,
+    pub all_candidates: RoaringBitmap,
+
+    pub degraded: bool,
+}
+
+// TODO: would probably be good to regroup some of these inside of a struct?
+#[allow(clippy::too_many_arguments)]
+#[tracing::instrument(level = "trace", skip_all, target = "search::bucket_sort")]
+pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
+    ctx: &mut SearchContext<'ctx>,
+    mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
+    query: &Q,
+    distinct: Option<&str>,
+    universe: &RoaringBitmap,
+    from: usize,
+    length: usize,
+    scoring_strategy: ScoringStrategy,
+    logger: &mut dyn SearchLogger<Q>,
+    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
+) -> Result<BucketSortOutput> {
+    logger.initial_query(query);
+    logger.ranking_rules(&ranking_rules);
+    logger.initial_universe(universe);
+
+    let distinct_field = match distinct {
+        Some(distinct) => Some(distinct),
+        None => ctx.index.distinct_field(ctx.txn)?,
+    };
+
+    let distinct_fid = if let Some(field) = distinct_field {
+        ctx.index.fields_ids_map(ctx.txn)?.id(field)
+    } else {
+        None
+    };
+
+    if universe.len() < from as u64 {
+        return Ok(BucketSortOutput {
+            docids: vec![],
+            scores: vec![],
+            all_candidates: universe.clone(),
+            degraded: false,
+        });
+    }
+    if ranking_rules.is_empty() {
+        if let Some(distinct_fid) = distinct_fid {
+            let mut excluded = RoaringBitmap::new();
+            let mut results = vec![];
+            for docid in universe.iter() {
+                if results.len() >= from + length {
+                    break;
+                }
+                if excluded.contains(docid) {
+                    continue;
+                }
+
+                distinct_single_docid(ctx.index, ctx.txn, distinct_fid, docid, &mut excluded)?;
+                results.push(docid);
+            }
+
+            let mut all_candidates = universe - excluded;
+            all_candidates.extend(results.iter().copied());
+            // drain the results of the skipped elements
+            // this **must** be done **after** writing the entire results in `all_candidates` to ensure
+            // e.g. estimatedTotalHits is correct.
+            if results.len() >= from {
+                results.drain(..from);
+            } else {
+                results.clear();
+            }
+
+            return Ok(BucketSortOutput {
+                scores: vec![Default::default(); results.len()],
+                docids: results,
+                all_candidates,
+                degraded: false,
+            });
+        } else {
+            let docids: Vec<u32> = universe.iter().skip(from).take(length).collect();
+            return Ok(BucketSortOutput {
+                scores: vec![Default::default(); docids.len()],
+                docids,
+                all_candidates: universe.clone(),
+                degraded: false,
+            });
+        };
+    }
+
+    let ranking_rules_len = ranking_rules.len();
+
+    logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query, universe);
+
+    ranking_rules[0].start_iteration(ctx, logger, universe, query)?;
+
+    let mut ranking_rule_scores: Vec<ScoreDetails> = vec![];
+
+    let mut ranking_rule_universes: Vec<RoaringBitmap> =
+        vec![RoaringBitmap::default(); ranking_rules_len];
+    ranking_rule_universes[0].clone_from(universe);
+    let mut cur_ranking_rule_index = 0;
+
+    /// Finish iterating over the current ranking rule, yielding
+    /// control to the parent (or finishing the search if not possible).
+    /// Update the universes accordingly and inform the logger.
+    macro_rules! back {
+        () => {
+            // FIXME: temporarily disabled assert: see <https://github.com/meilisearch/meilisearch/pull/4013>
+            // assert!(
+            //     ranking_rule_universes[cur_ranking_rule_index].is_empty(),
+            //     "The ranking rule {} did not sort its bucket exhaustively",
+            //     ranking_rules[cur_ranking_rule_index].id()
+            // );
+            logger.end_iteration_ranking_rule(
+                cur_ranking_rule_index,
+                ranking_rules[cur_ranking_rule_index].as_ref(),
+                &ranking_rule_universes[cur_ranking_rule_index],
+            );
+            ranking_rule_universes[cur_ranking_rule_index].clear();
+            ranking_rules[cur_ranking_rule_index].end_iteration(ctx, logger);
+            if cur_ranking_rule_index == 0 {
+                break;
+            } else {
+                cur_ranking_rule_index -= 1;
+            }
+            if ranking_rule_scores.len() > cur_ranking_rule_index {
+                ranking_rule_scores.pop();
+            }
+        };
+    }
+
+    let mut all_candidates = universe.clone();
+    let mut valid_docids = vec![];
+    let mut valid_scores = vec![];
+    let mut cur_offset = 0usize;
+
+    macro_rules! maybe_add_to_results {
+        ($candidates:expr) => {
+            maybe_add_to_results(
+                ctx,
+                from,
+                length,
+                logger,
+                &mut valid_docids,
+                &mut valid_scores,
+                &mut all_candidates,
+                &mut ranking_rule_universes,
+                &mut ranking_rules,
+                cur_ranking_rule_index,
+                &mut cur_offset,
+                distinct_fid,
+                &ranking_rule_scores,
+                $candidates,
+            )?;
+        };
+    }
+
+    while valid_docids.len() < length {
+        if time_budget.exceeded() {
+            loop {
+                let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
+                ranking_rule_scores.push(ScoreDetails::Skipped);
+
+                // remove candidates from the universe without adding them to result if their score is below the threshold
+                if let Some(ranking_score_threshold) = ranking_score_threshold {
+                    let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
+                    if current_score < ranking_score_threshold {
+                        all_candidates -= bucket | &ranking_rule_universes[cur_ranking_rule_index];
+                        back!();
+                        continue;
+                    }
+                }
+
+                maybe_add_to_results!(bucket);
+
+                ranking_rule_scores.pop();
+
+                if cur_ranking_rule_index == 0 {
+                    break;
+                }
+
+                back!();
+            }
+
+            return Ok(BucketSortOutput {
+                scores: valid_scores,
+                docids: valid_docids,
+                all_candidates,
+                degraded: true,
+            });
+        }
+
+        // The universe for this bucket is zero, so we don't need to sort
+        // anything, just go back to the parent ranking rule.
+        if ranking_rule_universes[cur_ranking_rule_index].is_empty()
+            || (scoring_strategy == ScoringStrategy::Skip
+                && ranking_rule_universes[cur_ranking_rule_index].len() == 1)
+        {
+            let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]);
+            maybe_add_to_results!(bucket);
+            back!();
+            continue;
+        }
+
+        let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(
+            ctx,
+            logger,
+            &ranking_rule_universes[cur_ranking_rule_index],
+        )?
+        else {
+            back!();
+            continue;
+        };
+
+        ranking_rule_scores.push(next_bucket.score);
+
+        logger.next_bucket_ranking_rule(
+            cur_ranking_rule_index,
+            ranking_rules[cur_ranking_rule_index].as_ref(),
+            &ranking_rule_universes[cur_ranking_rule_index],
+            &next_bucket.candidates,
+        );
+
+        debug_assert!(
+            ranking_rule_universes[cur_ranking_rule_index].is_superset(&next_bucket.candidates)
+        );
+
+        // remove candidates from the universe without adding them to result if their score is below the threshold
+        if let Some(ranking_score_threshold) = ranking_score_threshold {
+            let current_score = ScoreDetails::global_score(ranking_rule_scores.iter());
+            if current_score < ranking_score_threshold {
+                all_candidates -=
+                    next_bucket.candidates | &ranking_rule_universes[cur_ranking_rule_index];
+                back!();
+                continue;
+            }
+        }
+
+        ranking_rule_universes[cur_ranking_rule_index] -= &next_bucket.candidates;
+
+        if cur_ranking_rule_index == ranking_rules_len - 1
+            || (scoring_strategy == ScoringStrategy::Skip && next_bucket.candidates.len() <= 1)
+            || cur_offset + (next_bucket.candidates.len() as usize) < from
+        {
+            maybe_add_to_results!(next_bucket.candidates);
+            ranking_rule_scores.pop();
+            continue;
+        }
+
+        cur_ranking_rule_index += 1;
+        ranking_rule_universes[cur_ranking_rule_index].clone_from(&next_bucket.candidates);
+        logger.start_iteration_ranking_rule(
+            cur_ranking_rule_index,
+            ranking_rules[cur_ranking_rule_index].as_ref(),
+            &next_bucket.query,
+            &ranking_rule_universes[cur_ranking_rule_index],
+        );
+        ranking_rules[cur_ranking_rule_index].start_iteration(
+            ctx,
+            logger,
+            &next_bucket.candidates,
+            &next_bucket.query,
+        )?;
+    }
+
+    Ok(BucketSortOutput {
+        docids: valid_docids,
+        scores: valid_scores,
+        all_candidates,
+        degraded: false,
+    })
+}
+
+/// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset`
+/// into account and inform the logger.
+#[allow(clippy::too_many_arguments)]
+fn maybe_add_to_results<'ctx, Q: RankingRuleQueryTrait>(
+    ctx: &mut SearchContext<'ctx>,
+    from: usize,
+    length: usize,
+    logger: &mut dyn SearchLogger<Q>,
+
+    valid_docids: &mut Vec<u32>,
+    valid_scores: &mut Vec<Vec<ScoreDetails>>,
+    all_candidates: &mut RoaringBitmap,
+
+    ranking_rule_universes: &mut [RoaringBitmap],
+    ranking_rules: &mut [BoxRankingRule<'ctx, Q>],
+
+    cur_ranking_rule_index: usize,
+
+    cur_offset: &mut usize,
+
+    distinct_fid: Option<u16>,
+    ranking_rule_scores: &[ScoreDetails],
+    candidates: RoaringBitmap,
+) -> Result<()> {
+    // First apply the distinct rule on the candidates, reducing the universes if necessary
+    let candidates = if let Some(distinct_fid) = distinct_fid {
+        let DistinctOutput { remaining, excluded } =
+            apply_distinct_rule(ctx, distinct_fid, &candidates)?;
+        for universe in ranking_rule_universes.iter_mut() {
+            *universe -= &excluded;
+            *all_candidates -= &excluded;
+        }
+        remaining
+    } else {
+        candidates.clone()
+    };
+    *all_candidates |= &candidates;
+
+    // if the candidates are empty, there is nothing to do;
+    if candidates.is_empty() {
+        return Ok(());
+    }
+
+    // if we still haven't reached the first document to return
+    if *cur_offset < from {
+        // and if no document from this bucket can be returned
+        if *cur_offset + (candidates.len() as usize) < from {
+            // then just skip the bucket
+            logger.skip_bucket_ranking_rule(
+                cur_ranking_rule_index,
+                ranking_rules[cur_ranking_rule_index].as_ref(),
+                &candidates,
+            );
+        } else {
+            // otherwise, skip some of the documents and add some of the rest, in order of ids
+            let candidates_vec = candidates.iter().collect::<Vec<_>>();
+            let (skipped_candidates, candidates) = candidates_vec.split_at(from - *cur_offset);
+
+            logger.skip_bucket_ranking_rule(
+                cur_ranking_rule_index,
+                ranking_rules[cur_ranking_rule_index].as_ref(),
+                &skipped_candidates.iter().collect(),
+            );
+            let candidates =
+                candidates.iter().take(length - valid_docids.len()).copied().collect::<Vec<_>>();
+            logger.add_to_results(&candidates);
+            valid_docids.extend_from_slice(&candidates);
+            valid_scores
+                .extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
+        }
+    } else {
+        // if we have passed the offset already, add some of the documents (up to the limit)
+        let candidates = candidates.iter().take(length - valid_docids.len()).collect::<Vec<u32>>();
+        logger.add_to_results(&candidates);
+        valid_docids.extend_from_slice(&candidates);
+        valid_scores
+            .extend(std::iter::repeat(ranking_rule_scores.to_owned()).take(candidates.len()));
+    }
+
+    *cur_offset += candidates.len() as usize;
+    Ok(())
+}
--- a/crates/milli/src/search/new/db_cache.rs
+++ b/crates/milli/src/search/new/db_cache.rs
@ -0,0 +1,719 @@
+use std::borrow::Cow;
+use std::collections::hash_map::Entry;
+use std::hash::Hash;
+
+use fxhash::FxHashMap;
+use grenad::MergeFunction;
+use heed::types::Bytes;
+use heed::{BytesEncode, Database, RoTxn};
+use roaring::RoaringBitmap;
+
+use super::interner::Interned;
+use super::Word;
+use crate::heed_codec::{BytesDecodeOwned, StrBEU16Codec};
+use crate::proximity::ProximityPrecision;
+use crate::update::MergeCboRoaringBitmaps;
+use crate::{
+    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, Result, SearchContext, U8StrStrCodec,
+};
+
+/// A cache storing pointers to values in the LMDB databases.
+///
+/// Used for performance reasons only. By using this cache, we avoid performing a
+/// database lookup and instead get a direct reference to the value using a fast
+/// local HashMap lookup.
+#[derive(Default)]
+pub struct DatabaseCache<'ctx> {
+    pub word_pair_proximity_docids:
+        FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
+    pub word_prefix_pair_proximity_docids:
+        FxHashMap<(u8, Interned<String>, Interned<String>), Option<RoaringBitmap>>,
+    pub prefix_word_pair_proximity_docids:
+        FxHashMap<(u8, Interned<String>, Interned<String>), Option<Cow<'ctx, [u8]>>>,
+    pub word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
+    pub exact_word_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
+    pub word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
+    pub exact_word_prefix_docids: FxHashMap<Interned<String>, Option<Cow<'ctx, [u8]>>>,
+
+    pub words_fst: Option<fst::Set<Cow<'ctx, [u8]>>>,
+    pub word_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
+    pub word_prefix_position_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
+    pub word_positions: FxHashMap<Interned<String>, Vec<u16>>,
+    pub word_prefix_positions: FxHashMap<Interned<String>, Vec<u16>>,
+
+    pub word_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
+    pub word_prefix_fid_docids: FxHashMap<(Interned<String>, u16), Option<Cow<'ctx, [u8]>>>,
+    pub word_fids: FxHashMap<Interned<String>, Vec<u16>>,
+    pub word_prefix_fids: FxHashMap<Interned<String>, Vec<u16>>,
+}
+impl<'ctx> DatabaseCache<'ctx> {
+    fn get_value<'v, K1, KC>(
+        txn: &'ctx RoTxn<'_>,
+        cache_key: K1,
+        db_key: &'v KC::EItem,
+        cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
+        universe: Option<&RoaringBitmap>,
+        db: Database<KC, Bytes>,
+    ) -> Result<Option<RoaringBitmap>>
+    where
+        K1: Copy + Eq + Hash,
+        KC: BytesEncode<'v>,
+    {
+        if let Entry::Vacant(entry) = cache.entry(cache_key) {
+            let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed);
+            entry.insert(bitmap_ptr);
+        }
+
+        let bitmap_bytes = match cache.get(&cache_key).unwrap() {
+            Some(Cow::Borrowed(bytes)) => bytes,
+            Some(Cow::Owned(bytes)) => bytes.as_slice(),
+            None => return Ok(None),
+        };
+
+        match (bitmap_bytes, universe) {
+            (bytes, Some(universe)) => {
+                CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
+                    .map(Some)
+                    .map_err(Into::into)
+            }
+            (bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
+                .map(Some)
+                .map_err(heed::Error::Decoding)
+                .map_err(Into::into),
+        }
+    }
+
+    fn get_value_length<'v, K1, KC>(
+        txn: &'ctx RoTxn<'_>,
+        cache_key: K1,
+        db_key: &'v KC::EItem,
+        cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
+        db: Database<KC, Bytes>,
+    ) -> Result<Option<u64>>
+    where
+        K1: Copy + Eq + Hash,
+        KC: BytesEncode<'v>,
+    {
+        if let Entry::Vacant(entry) = cache.entry(cache_key) {
+            let bitmap_ptr = db.get(txn, db_key)?.map(Cow::Borrowed);
+            entry.insert(bitmap_ptr);
+        }
+
+        let bitmap_bytes = match cache.get(&cache_key).unwrap() {
+            Some(Cow::Borrowed(bytes)) => bytes,
+            Some(Cow::Owned(bytes)) => bytes.as_slice(),
+            None => return Ok(None),
+        };
+
+        CboRoaringBitmapLenCodec::bytes_decode_owned(bitmap_bytes)
+            .map(Some)
+            .map_err(heed::Error::Decoding)
+            .map_err(Into::into)
+    }
+
+    fn get_value_from_keys<'v, K1, KC, MF>(
+        txn: &'ctx RoTxn<'_>,
+        cache_key: K1,
+        db_keys: &'v [KC::EItem],
+        cache: &mut FxHashMap<K1, Option<Cow<'ctx, [u8]>>>,
+        db: Database<KC, Bytes>,
+        universe: Option<&RoaringBitmap>,
+        merger: MF,
+    ) -> Result<Option<RoaringBitmap>>
+    where
+        K1: Copy + Eq + Hash,
+        KC: BytesEncode<'v>,
+        KC::EItem: Sized,
+        MF: MergeFunction,
+        crate::Error: From<MF::Error>,
+    {
+        if let Entry::Vacant(entry) = cache.entry(cache_key) {
+            let bitmap_ptr: Option<Cow<'ctx, [u8]>> = match db_keys {
+                [] => None,
+                [key] => db.get(txn, key)?.map(Cow::Borrowed),
+                keys => {
+                    let bitmaps = keys
+                        .iter()
+                        .filter_map(|key| db.get(txn, key).transpose())
+                        .map(|v| v.map(Cow::Borrowed))
+                        .collect::<std::result::Result<Vec<Cow<'_, [u8]>>, _>>()?;
+
+                    if bitmaps.is_empty() {
+                        None
+                    } else {
+                        Some(merger.merge(&[], &bitmaps[..])?)
+                    }
+                }
+            };
+
+            entry.insert(bitmap_ptr);
+        }
+
+        let bitmap_bytes = match cache.get(&cache_key).unwrap() {
+            Some(Cow::Borrowed(bytes)) => bytes,
+            Some(Cow::Owned(bytes)) => bytes.as_slice(),
+            None => return Ok(None),
+        };
+
+        match (bitmap_bytes, universe) {
+            (bytes, Some(universe)) => {
+                CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)
+                    .map(Some)
+                    .map_err(Into::into)
+            }
+            (bytes, None) => CboRoaringBitmapCodec::bytes_decode_owned(bytes)
+                .map(Some)
+                .map_err(heed::Error::Decoding)
+                .map_err(Into::into),
+        }
+    }
+}
+
+impl<'ctx> SearchContext<'ctx> {
+    pub fn get_words_fst(&mut self) -> Result<fst::Set<Cow<'ctx, [u8]>>> {
+        if let Some(fst) = self.db_cache.words_fst.clone() {
+            Ok(fst)
+        } else {
+            let fst = self.index.words_fst(self.txn)?;
+            self.db_cache.words_fst = Some(fst.clone());
+            Ok(fst)
+        }
+    }
+
+    pub fn word_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        word: Word,
+    ) -> Result<Option<RoaringBitmap>> {
+        match word {
+            Word::Original(word) => {
+                let exact = self.get_db_exact_word_docids(universe, word)?;
+                let tolerant = self.get_db_word_docids(universe, word)?;
+                Ok(match (exact, tolerant) {
+                    (None, None) => None,
+                    (None, Some(tolerant)) => Some(tolerant),
+                    (Some(exact), None) => Some(exact),
+                    (Some(exact), Some(tolerant)) => {
+                        let mut both = exact;
+                        both |= tolerant;
+                        Some(both)
+                    }
+                })
+            }
+            Word::Derived(word) => self.get_db_word_docids(universe, word),
+        }
+    }
+
+    /// Retrieve or insert the given value in the `word_docids` database.
+    fn get_db_word_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        word: Interned<String>,
+    ) -> Result<Option<RoaringBitmap>> {
+        match &self.restricted_fids {
+            Some(restricted_fids) => {
+                let interned = self.word_interner.get(word).as_str();
+                let keys: Vec<_> =
+                    restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
+
+                DatabaseCache::get_value_from_keys(
+                    self.txn,
+                    word,
+                    &keys[..],
+                    &mut self.db_cache.word_docids,
+                    self.index.word_fid_docids.remap_data_type::<Bytes>(),
+                    universe,
+                    MergeCboRoaringBitmaps,
+                )
+            }
+            None => DatabaseCache::get_value(
+                self.txn,
+                word,
+                self.word_interner.get(word).as_str(),
+                &mut self.db_cache.word_docids,
+                universe,
+                self.index.word_docids.remap_data_type::<Bytes>(),
+            ),
+        }
+    }
+
+    fn get_db_exact_word_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        word: Interned<String>,
+    ) -> Result<Option<RoaringBitmap>> {
+        match &self.restricted_fids {
+            Some(restricted_fids) => {
+                let interned = self.word_interner.get(word).as_str();
+                let keys: Vec<_> =
+                    restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
+
+                DatabaseCache::get_value_from_keys(
+                    self.txn,
+                    word,
+                    &keys[..],
+                    &mut self.db_cache.exact_word_docids,
+                    self.index.word_fid_docids.remap_data_type::<Bytes>(),
+                    universe,
+                    MergeCboRoaringBitmaps,
+                )
+            }
+            None => DatabaseCache::get_value(
+                self.txn,
+                word,
+                self.word_interner.get(word).as_str(),
+                &mut self.db_cache.exact_word_docids,
+                universe,
+                self.index.exact_word_docids.remap_data_type::<Bytes>(),
+            ),
+        }
+    }
+
+    pub fn word_prefix_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        prefix: Word,
+    ) -> Result<Option<RoaringBitmap>> {
+        match prefix {
+            Word::Original(prefix) => {
+                let exact = self.get_db_exact_word_prefix_docids(universe, prefix)?;
+                let tolerant = self.get_db_word_prefix_docids(universe, prefix)?;
+                Ok(match (exact, tolerant) {
+                    (None, None) => None,
+                    (None, Some(tolerant)) => Some(tolerant),
+                    (Some(exact), None) => Some(exact),
+                    (Some(exact), Some(tolerant)) => {
+                        let mut both = exact;
+                        both |= tolerant;
+                        Some(both)
+                    }
+                })
+            }
+            Word::Derived(prefix) => self.get_db_word_prefix_docids(universe, prefix),
+        }
+    }
+
+    /// Retrieve or insert the given value in the `word_prefix_docids` database.
+    fn get_db_word_prefix_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        prefix: Interned<String>,
+    ) -> Result<Option<RoaringBitmap>> {
+        match &self.restricted_fids {
+            Some(restricted_fids) => {
+                let interned = self.word_interner.get(prefix).as_str();
+                let keys: Vec<_> =
+                    restricted_fids.tolerant.iter().map(|(fid, _)| (interned, *fid)).collect();
+
+                DatabaseCache::get_value_from_keys(
+                    self.txn,
+                    prefix,
+                    &keys[..],
+                    &mut self.db_cache.word_prefix_docids,
+                    self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
+                    universe,
+                    MergeCboRoaringBitmaps,
+                )
+            }
+            None => DatabaseCache::get_value(
+                self.txn,
+                prefix,
+                self.word_interner.get(prefix).as_str(),
+                &mut self.db_cache.word_prefix_docids,
+                universe,
+                self.index.word_prefix_docids.remap_data_type::<Bytes>(),
+            ),
+        }
+    }
+
+    fn get_db_exact_word_prefix_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        prefix: Interned<String>,
+    ) -> Result<Option<RoaringBitmap>> {
+        match &self.restricted_fids {
+            Some(restricted_fids) => {
+                let interned = self.word_interner.get(prefix).as_str();
+                let keys: Vec<_> =
+                    restricted_fids.exact.iter().map(|(fid, _)| (interned, *fid)).collect();
+
+                DatabaseCache::get_value_from_keys(
+                    self.txn,
+                    prefix,
+                    &keys[..],
+                    &mut self.db_cache.exact_word_prefix_docids,
+                    self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
+                    universe,
+                    MergeCboRoaringBitmaps,
+                )
+            }
+            None => DatabaseCache::get_value(
+                self.txn,
+                prefix,
+                self.word_interner.get(prefix).as_str(),
+                &mut self.db_cache.exact_word_prefix_docids,
+                universe,
+                self.index.exact_word_prefix_docids.remap_data_type::<Bytes>(),
+            ),
+        }
+    }
+
+    pub fn get_db_word_pair_proximity_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        word1: Interned<String>,
+        word2: Interned<String>,
+        proximity: u8,
+    ) -> Result<Option<RoaringBitmap>> {
+        match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
+            ProximityPrecision::ByAttribute => {
+                // Force proximity to 0 because:
+                // in ByAttribute, there are only 2 possible distances:
+                // 1. words in same attribute: in that the DB contains (0, word1, word2)
+                // 2. words in different attributes: no DB entry for these two words.
+                let proximity = 0;
+                let docids = if let Some(docids) =
+                    self.db_cache.word_pair_proximity_docids.get(&(proximity, word1, word2))
+                {
+                    docids
+                        .as_ref()
+                        .map(|d| CboRoaringBitmapCodec::bytes_decode_owned(d))
+                        .transpose()
+                        .map_err(heed::Error::Decoding)?
+                } else {
+                    // Compute the distance at the attribute level and store it in the cache.
+                    let fids = self.index.searchable_fields_ids(self.txn)?;
+                    let mut docids = RoaringBitmap::new();
+                    for fid in fids {
+                        // for each field, intersect left word bitmap and right word bitmap,
+                        // then merge the result in a global bitmap before storing it in the cache.
+                        let word1_docids = self.get_db_word_fid_docids(universe, word1, fid)?;
+                        let word2_docids = self.get_db_word_fid_docids(universe, word2, fid)?;
+                        if let (Some(word1_docids), Some(word2_docids)) =
+                            (word1_docids, word2_docids)
+                        {
+                            docids |= word1_docids & word2_docids;
+                        }
+                    }
+                    let encoded = CboRoaringBitmapCodec::bytes_encode(&docids)
+                        .map(Cow::into_owned)
+                        .map(Cow::Owned)
+                        .map(Some)
+                        .map_err(heed::Error::Decoding)?;
+                    self.db_cache
+                        .word_pair_proximity_docids
+                        .insert((proximity, word1, word2), encoded);
+                    Some(docids)
+                };
+
+                Ok(docids)
+            }
+            ProximityPrecision::ByWord => DatabaseCache::get_value(
+                self.txn,
+                (proximity, word1, word2),
+                &(
+                    proximity,
+                    self.word_interner.get(word1).as_str(),
+                    self.word_interner.get(word2).as_str(),
+                ),
+                &mut self.db_cache.word_pair_proximity_docids,
+                universe,
+                self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
+            ),
+        }
+    }
+
+    pub fn get_db_word_pair_proximity_docids_len(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        word1: Interned<String>,
+        word2: Interned<String>,
+        proximity: u8,
+    ) -> Result<Option<u64>> {
+        match self.index.proximity_precision(self.txn)?.unwrap_or_default() {
+            ProximityPrecision::ByAttribute => Ok(self
+                .get_db_word_pair_proximity_docids(universe, word1, word2, proximity)?
+                .map(|d| d.len())),
+            ProximityPrecision::ByWord => DatabaseCache::get_value_length::<_, _>(
+                self.txn,
+                (proximity, word1, word2),
+                &(
+                    proximity,
+                    self.word_interner.get(word1).as_str(),
+                    self.word_interner.get(word2).as_str(),
+                ),
+                &mut self.db_cache.word_pair_proximity_docids,
+                self.index.word_pair_proximity_docids.remap_data_type::<Bytes>(),
+            ),
+        }
+    }
+
+    pub fn get_db_word_prefix_pair_proximity_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        word1: Interned<String>,
+        prefix2: Interned<String>,
+        mut proximity: u8,
+    ) -> Result<Option<RoaringBitmap>> {
+        let proximity_precision = self.index.proximity_precision(self.txn)?.unwrap_or_default();
+        if proximity_precision == ProximityPrecision::ByAttribute {
+            // Force proximity to 0 because:
+            // in ByAttribute, there are only 2 possible distances:
+            // 1. words in same attribute: in that the DB contains (0, word1, word2)
+            // 2. words in different attributes: no DB entry for these two words.
+            proximity = 0;
+        }
+
+        let docids = if let Some(docids) =
+            self.db_cache.word_prefix_pair_proximity_docids.get(&(proximity, word1, prefix2))
+        {
+            docids.clone()
+        } else {
+            let prefix_docids = match proximity_precision {
+                ProximityPrecision::ByAttribute => {
+                    // Compute the distance at the attribute level and store it in the cache.
+                    let fids = self.index.searchable_fields_ids(self.txn)?;
+                    let mut prefix_docids = RoaringBitmap::new();
+                    // for each field, intersect left word bitmap and right word bitmap,
+                    // then merge the result in a global bitmap before storing it in the cache.
+                    for fid in fids {
+                        let word1_docids = self.get_db_word_fid_docids(universe, word1, fid)?;
+                        let prefix2_docids =
+                            self.get_db_word_prefix_fid_docids(universe, prefix2, fid)?;
+                        if let (Some(word1_docids), Some(prefix2_docids)) =
+                            (word1_docids, prefix2_docids)
+                        {
+                            prefix_docids |= word1_docids & prefix2_docids;
+                        }
+                    }
+                    prefix_docids
+                }
+                ProximityPrecision::ByWord => {
+                    // compute docids using prefix iter and store the result in the cache.
+                    let key = U8StrStrCodec::bytes_encode(&(
+                        proximity,
+                        self.word_interner.get(word1).as_str(),
+                        self.word_interner.get(prefix2).as_str(),
+                    ))
+                    .unwrap()
+                    .into_owned();
+                    let mut prefix_docids = RoaringBitmap::new();
+                    let remap_key_type = self
+                        .index
+                        .word_pair_proximity_docids
+                        .remap_key_type::<Bytes>()
+                        .prefix_iter(self.txn, &key)?;
+                    for result in remap_key_type {
+                        let (_, docids) = result?;
+
+                        prefix_docids |= docids;
+                    }
+                    prefix_docids
+                }
+            };
+            self.db_cache
+                .word_prefix_pair_proximity_docids
+                .insert((proximity, word1, prefix2), Some(prefix_docids.clone()));
+            Some(prefix_docids)
+        };
+        Ok(docids)
+    }
+
+    pub fn get_db_prefix_word_pair_proximity_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        left_prefix: Interned<String>,
+        right: Interned<String>,
+        proximity: u8,
+    ) -> Result<Option<RoaringBitmap>> {
+        // only accept exact matches on reverted positions
+        self.get_db_word_pair_proximity_docids(universe, left_prefix, right, proximity)
+    }
+
+    pub fn get_db_word_fid_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        word: Interned<String>,
+        fid: u16,
+    ) -> Result<Option<RoaringBitmap>> {
+        // if the requested fid isn't in the restricted list, return None.
+        if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
+            return Ok(None);
+        }
+
+        DatabaseCache::get_value(
+            self.txn,
+            (word, fid),
+            &(self.word_interner.get(word).as_str(), fid),
+            &mut self.db_cache.word_fid_docids,
+            universe,
+            self.index.word_fid_docids.remap_data_type::<Bytes>(),
+        )
+    }
+
+    pub fn get_db_word_prefix_fid_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        word_prefix: Interned<String>,
+        fid: u16,
+    ) -> Result<Option<RoaringBitmap>> {
+        // if the requested fid isn't in the restricted list, return None.
+        if self.restricted_fids.as_ref().map_or(false, |fids| !fids.contains(&fid)) {
+            return Ok(None);
+        }
+
+        DatabaseCache::get_value(
+            self.txn,
+            (word_prefix, fid),
+            &(self.word_interner.get(word_prefix).as_str(), fid),
+            &mut self.db_cache.word_prefix_fid_docids,
+            universe,
+            self.index.word_prefix_fid_docids.remap_data_type::<Bytes>(),
+        )
+    }
+
+    pub fn get_db_word_fids(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
+        let fids = match self.db_cache.word_fids.entry(word) {
+            Entry::Occupied(fids) => fids.get().clone(),
+            Entry::Vacant(entry) => {
+                let mut key = self.word_interner.get(word).as_bytes().to_owned();
+                key.push(0);
+                let mut fids = vec![];
+                let remap_key_type = self
+                    .index
+                    .word_fid_docids
+                    .remap_types::<Bytes, Bytes>()
+                    .prefix_iter(self.txn, &key)?
+                    .remap_key_type::<StrBEU16Codec>();
+                for result in remap_key_type {
+                    let ((_, fid), value) = result?;
+                    // filling other caches to avoid searching for them again
+                    self.db_cache.word_fid_docids.insert((word, fid), Some(Cow::Borrowed(value)));
+                    fids.push(fid);
+                }
+                entry.insert(fids.clone());
+                fids
+            }
+        };
+        Ok(fids)
+    }
+
+    pub fn get_db_word_prefix_fids(&mut self, word_prefix: Interned<String>) -> Result<Vec<u16>> {
+        let fids = match self.db_cache.word_prefix_fids.entry(word_prefix) {
+            Entry::Occupied(fids) => fids.get().clone(),
+            Entry::Vacant(entry) => {
+                let mut key = self.word_interner.get(word_prefix).as_bytes().to_owned();
+                key.push(0);
+                let mut fids = vec![];
+                let remap_key_type = self
+                    .index
+                    .word_prefix_fid_docids
+                    .remap_types::<Bytes, Bytes>()
+                    .prefix_iter(self.txn, &key)?
+                    .remap_key_type::<StrBEU16Codec>();
+                for result in remap_key_type {
+                    let ((_, fid), value) = result?;
+                    // filling other caches to avoid searching for them again
+                    self.db_cache
+                        .word_prefix_fid_docids
+                        .insert((word_prefix, fid), Some(Cow::Borrowed(value)));
+                    fids.push(fid);
+                }
+                entry.insert(fids.clone());
+                fids
+            }
+        };
+        Ok(fids)
+    }
+
+    pub fn get_db_word_position_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        word: Interned<String>,
+        position: u16,
+    ) -> Result<Option<RoaringBitmap>> {
+        DatabaseCache::get_value(
+            self.txn,
+            (word, position),
+            &(self.word_interner.get(word).as_str(), position),
+            &mut self.db_cache.word_position_docids,
+            universe,
+            self.index.word_position_docids.remap_data_type::<Bytes>(),
+        )
+    }
+
+    pub fn get_db_word_prefix_position_docids(
+        &mut self,
+        universe: Option<&RoaringBitmap>,
+        word_prefix: Interned<String>,
+        position: u16,
+    ) -> Result<Option<RoaringBitmap>> {
+        DatabaseCache::get_value(
+            self.txn,
+            (word_prefix, position),
+            &(self.word_interner.get(word_prefix).as_str(), position),
+            &mut self.db_cache.word_prefix_position_docids,
+            universe,
+            self.index.word_prefix_position_docids.remap_data_type::<Bytes>(),
+        )
+    }
+
+    pub fn get_db_word_positions(&mut self, word: Interned<String>) -> Result<Vec<u16>> {
+        let positions = match self.db_cache.word_positions.entry(word) {
+            Entry::Occupied(positions) => positions.get().clone(),
+            Entry::Vacant(entry) => {
+                let mut key = self.word_interner.get(word).as_bytes().to_owned();
+                key.push(0);
+                let mut positions = vec![];
+                let remap_key_type = self
+                    .index
+                    .word_position_docids
+                    .remap_types::<Bytes, Bytes>()
+                    .prefix_iter(self.txn, &key)?
+                    .remap_key_type::<StrBEU16Codec>();
+                for result in remap_key_type {
+                    let ((_, position), value) = result?;
+                    // filling other caches to avoid searching for them again
+                    self.db_cache
+                        .word_position_docids
+                        .insert((word, position), Some(Cow::Borrowed(value)));
+                    positions.push(position);
+                }
+                entry.insert(positions.clone());
+                positions
+            }
+        };
+        Ok(positions)
+    }
+
+    pub fn get_db_word_prefix_positions(
+        &mut self,
+        word_prefix: Interned<String>,
+    ) -> Result<Vec<u16>> {
+        let positions = match self.db_cache.word_prefix_positions.entry(word_prefix) {
+            Entry::Occupied(positions) => positions.get().clone(),
+            Entry::Vacant(entry) => {
+                let mut key = self.word_interner.get(word_prefix).as_bytes().to_owned();
+                key.push(0);
+                let mut positions = vec![];
+                let remap_key_type = self
+                    .index
+                    .word_prefix_position_docids
+                    .remap_types::<Bytes, Bytes>()
+                    .prefix_iter(self.txn, &key)?
+                    .remap_key_type::<StrBEU16Codec>();
+                for result in remap_key_type {
+                    let ((_, position), value) = result?;
+                    // filling other caches to avoid searching for them again
+                    self.db_cache
+                        .word_prefix_position_docids
+                        .insert((word_prefix, position), Some(Cow::Borrowed(value)));
+                    positions.push(position);
+                }
+                entry.insert(positions.clone());
+                positions
+            }
+        };
+        Ok(positions)
+    }
+}
--- a/crates/milli/src/search/new/distinct.rs
+++ b/crates/milli/src/search/new/distinct.rs
@ -0,0 +1,123 @@
+use heed::types::{Bytes, Str, Unit};
+use heed::{Database, RoPrefix, RoTxn};
+use roaring::RoaringBitmap;
+
+const FID_SIZE: usize = 2;
+const DOCID_SIZE: usize = 4;
+
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetCodec,
+};
+use crate::heed_codec::BytesRefCodec;
+use crate::{Index, Result, SearchContext};
+
+pub struct DistinctOutput {
+    pub remaining: RoaringBitmap,
+    pub excluded: RoaringBitmap,
+}
+
+/// Return a [`DistinctOutput`] containing:
+/// - `remaining`: a set of docids built such that exactly one element from `candidates`
+/// is kept for each distinct value inside the given field. If the field does not exist, it
+/// is considered unique.
+/// - `excluded`: the set of document ids that contain a value for the given field that occurs
+/// in the given candidates.
+pub fn apply_distinct_rule(
+    ctx: &mut SearchContext<'_>,
+    field_id: u16,
+    candidates: &RoaringBitmap,
+) -> Result<DistinctOutput> {
+    let mut excluded = RoaringBitmap::new();
+    let mut remaining = RoaringBitmap::new();
+    for docid in candidates {
+        if excluded.contains(docid) {
+            continue;
+        }
+        distinct_single_docid(ctx.index, ctx.txn, field_id, docid, &mut excluded)?;
+        remaining.push(docid);
+    }
+    Ok(DistinctOutput { remaining, excluded })
+}
+
+/// Apply the distinct rule defined by [`apply_distinct_rule`] for a single document id.
+pub fn distinct_single_docid(
+    index: &Index,
+    txn: &RoTxn<'_>,
+    field_id: u16,
+    docid: u32,
+    excluded: &mut RoaringBitmap,
+) -> Result<()> {
+    for item in facet_string_values(docid, field_id, index, txn)? {
+        let ((_, _, facet_value), _) = item?;
+        if let Some(facet_docids) = facet_value_docids(
+            index.facet_id_string_docids.remap_types(),
+            txn,
+            field_id,
+            facet_value,
+        )? {
+            *excluded |= facet_docids;
+        }
+    }
+    for item in facet_number_values(docid, field_id, index, txn)? {
+        let ((_, _, facet_value), _) = item?;
+        if let Some(facet_docids) =
+            facet_value_docids(index.facet_id_f64_docids.remap_types(), txn, field_id, facet_value)?
+        {
+            *excluded |= facet_docids;
+        }
+    }
+    Ok(())
+}
+
+/// Return all the docids containing the given value in the given field
+fn facet_value_docids(
+    database: Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    txn: &RoTxn<'_>,
+    field_id: u16,
+    facet_value: &[u8],
+) -> heed::Result<Option<RoaringBitmap>> {
+    database
+        .get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value })
+        .map(|opt| opt.map(|v| v.bitmap))
+}
+
+/// Return an iterator over each number value in the given field of the given document.
+fn facet_number_values<'a>(
+    docid: u32,
+    field_id: u16,
+    index: &Index,
+    txn: &'a RoTxn<'a>,
+) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Unit>> {
+    let key = facet_values_prefix_key(field_id, docid);
+
+    let iter = index
+        .field_id_docid_facet_f64s
+        .remap_key_type::<Bytes>()
+        .prefix_iter(txn, &key)?
+        .remap_key_type();
+
+    Ok(iter)
+}
+
+/// Return an iterator over each string value in the given field of the given document.
+pub fn facet_string_values<'a>(
+    docid: u32,
+    field_id: u16,
+    index: &Index,
+    txn: &'a RoTxn<'a>,
+) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<BytesRefCodec>, Str>> {
+    let key = facet_values_prefix_key(field_id, docid);
+
+    let iter = index
+        .field_id_docid_facet_strings
+        .remap_key_type::<Bytes>()
+        .prefix_iter(txn, &key)?
+        .remap_types();
+
+    Ok(iter)
+}
+
+#[allow(clippy::drop_non_drop)]
+fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] {
+    concat_arrays::concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes())
+}
--- a/crates/milli/src/search/new/exact_attribute.rs
+++ b/crates/milli/src/search/new/exact_attribute.rs
@ -0,0 +1,299 @@
+use heed::types::Bytes;
+use roaring::{MultiOps, RoaringBitmap};
+
+use super::query_graph::QueryGraph;
+use super::ranking_rules::{RankingRule, RankingRuleOutput};
+use crate::score_details::{self, ScoreDetails};
+use crate::search::new::query_graph::QueryNodeData;
+use crate::search::new::query_term::ExactTerm;
+use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger};
+
+/// A ranking rule that produces 3 disjoint buckets:
+///
+/// 1. Documents from the universe whose value is exactly the query.
+/// 2. Documents from the universe not in (1) whose value starts with the query.
+/// 3. Documents from the universe not in (1) or (2).
+pub struct ExactAttribute {
+    state: State,
+}
+
+impl ExactAttribute {
+    pub fn new() -> Self {
+        Self { state: Default::default() }
+    }
+}
+
+impl<'ctx> RankingRule<'ctx, QueryGraph> for ExactAttribute {
+    fn id(&self) -> String {
+        "exact_attribute".to_owned()
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
+    fn start_iteration(
+        &mut self,
+        ctx: &mut SearchContext<'ctx>,
+        _logger: &mut dyn SearchLogger<QueryGraph>,
+        universe: &roaring::RoaringBitmap,
+        query: &QueryGraph,
+    ) -> Result<()> {
+        self.state = State::start_iteration(ctx, universe, query)?;
+        Ok(())
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
+    fn next_bucket(
+        &mut self,
+        _ctx: &mut SearchContext<'ctx>,
+        _logger: &mut dyn SearchLogger<QueryGraph>,
+        universe: &roaring::RoaringBitmap,
+    ) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
+        let state = std::mem::take(&mut self.state);
+        let (state, output) = State::next(state, universe);
+        self.state = state;
+
+        Ok(output)
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::exact_attribute")]
+    fn end_iteration(
+        &mut self,
+        _ctx: &mut SearchContext<'ctx>,
+        _logger: &mut dyn SearchLogger<QueryGraph>,
+    ) {
+        self.state = Default::default();
+    }
+}
+
+/// Inner state of the ranking rule.
+#[derive(Default)]
+enum State {
+    /// State between two iterations
+    #[default]
+    Uninitialized,
+    /// The next call to `next` will output the documents in the universe that have an attribute that is the exact query
+    ExactAttribute(QueryGraph, Vec<FieldCandidates>),
+    /// The next call to `next` will output the documents in the universe that have an attribute that starts with the exact query,
+    /// but isn't the exact query.
+    AttributeStarts(QueryGraph, Vec<FieldCandidates>),
+    /// The next calls to `next` will output the input universe.
+    Empty(QueryGraph),
+}
+
+/// The candidates sorted by attributes
+///
+/// Each of the bitmap in a single `FieldCandidates` struct applies to the same field.
+struct FieldCandidates {
+    /// The candidates that start with all the words of the query in the field
+    start_with_exact: RoaringBitmap,
+    /// The candidates that have the same number of words as the query in the field
+    exact_word_count: RoaringBitmap,
+}
+
+impl State {
+    fn start_iteration(
+        ctx: &mut SearchContext<'_>,
+        universe: &RoaringBitmap,
+        query_graph: &QueryGraph,
+    ) -> Result<Self> {
+        struct ExactTermInfo {
+            exact_term: ExactTerm,
+            start_position: u16,
+            start_term_id: u8,
+            position_count: usize,
+        }
+
+        let mut exact_terms: Vec<ExactTermInfo> =
+            Vec::with_capacity(query_graph.nodes.len() as usize);
+        for (_, node) in query_graph.nodes.iter() {
+            match &node.data {
+                QueryNodeData::Term(term) => {
+                    let exact_term = if let Some(exact_term) = term.term_subset.exact_term(ctx) {
+                        exact_term
+                    } else {
+                        continue;
+                    };
+                    exact_terms.push(ExactTermInfo {
+                        exact_term,
+                        start_position: *term.positions.start(),
+                        start_term_id: *term.term_ids.start(),
+                        position_count: term.positions.len(),
+                    });
+                }
+                QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue,
+            }
+        }
+
+        exact_terms.sort_by_key(|x| x.start_term_id);
+        exact_terms.dedup_by_key(|x| x.start_term_id);
+        let count_all_positions = exact_terms.iter().fold(0, |acc, x| acc + x.position_count);
+
+        // bail if there is a "hole" (missing word) in remaining query graph
+        if let Some(e) = exact_terms.first() {
+            if e.start_term_id != 0 {
+                return Ok(State::Empty(query_graph.clone()));
+            }
+        } else {
+            return Ok(State::Empty(query_graph.clone()));
+        }
+        let mut previous_id = 0;
+        for e in exact_terms.iter() {
+            if e.start_term_id < previous_id || e.start_term_id - previous_id > 1 {
+                return Ok(State::Empty(query_graph.clone()));
+            } else {
+                previous_id = e.start_term_id;
+            }
+        }
+
+        // sample query: "sunflower are pretty"
+        // sunflower at pos 0 in attr A
+        // are at pos 1 in attr B
+        // pretty at pos 2 in attr C
+        // We want to eliminate such document
+
+        // first check that for each term, there exists some attribute that has this term at the correct position
+        //"word-position-docids";
+        let mut candidates = universe.clone();
+        let words_positions: Vec<(Vec<_>, _)> = exact_terms
+            .iter()
+            .map(|e| (e.exact_term.interned_words(ctx).collect(), e.start_position))
+            .collect();
+        for (words, position) in &words_positions {
+            if candidates.is_empty() {
+                return Ok(State::Empty(query_graph.clone()));
+            }
+
+            'words: for (offset, word) in words.iter().enumerate() {
+                let offset = offset as u16;
+                let word = if let Some(word) = word {
+                    word
+                } else {
+                    continue 'words;
+                };
+                // Note: Since the position is stored bucketed in word_position_docids, for queries with a lot of
+                // longer phrases we'll be losing on precision here.
+                let bucketed_position = crate::bucketed_position(position + offset);
+                let word_position_docids = ctx
+                    .get_db_word_position_docids(Some(universe), *word, bucketed_position)?
+                    .unwrap_or_default();
+                candidates &= word_position_docids;
+                if candidates.is_empty() {
+                    return Ok(State::Empty(query_graph.clone()));
+                }
+            }
+        }
+
+        let candidates = candidates;
+
+        if candidates.is_empty() {
+            return Ok(State::Empty(query_graph.clone()));
+        }
+
+        let searchable_fields_ids = ctx.index.searchable_fields_ids(ctx.txn)?;
+
+        let mut candidates_per_attribute = Vec::with_capacity(searchable_fields_ids.len());
+        // then check that there exists at least one attribute that has all of the terms
+        for fid in searchable_fields_ids {
+            let intersection = MultiOps::intersection(
+                words_positions
+                    .iter()
+                    .flat_map(|(words, ..)| words.iter())
+                    // ignore stop words words in phrases
+                    .flatten()
+                    .map(|word| -> Result<_> {
+                        Ok(ctx
+                            .get_db_word_fid_docids(Some(&candidates), *word, fid)?
+                            .unwrap_or_default())
+                    }),
+            )?;
+            if !intersection.is_empty() {
+                // Although not really worth it in terms of performance,
+                // if would be good to put this in cache for the sake of consistency
+                let candidates_with_exact_word_count = if count_all_positions < u8::MAX as usize {
+                    let bitmap_bytes = ctx
+                        .index
+                        .field_id_word_count_docids
+                        .remap_data_type::<Bytes>()
+                        .get(ctx.txn, &(fid, count_all_positions as u8))?;
+
+                    match bitmap_bytes {
+                        Some(bytes) => {
+                            CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)?
+                        }
+                        None => RoaringBitmap::default(),
+                    }
+                } else {
+                    RoaringBitmap::default()
+                };
+                candidates_per_attribute.push(FieldCandidates {
+                    start_with_exact: intersection,
+                    exact_word_count: candidates_with_exact_word_count,
+                });
+            }
+        }
+        // note we could have "false positives" where there both exist different attributes that collectively
+        // have the terms in the correct order and a single attribute that have all the terms, but in the incorrect order.
+
+        Ok(State::ExactAttribute(query_graph.clone(), candidates_per_attribute))
+    }
+
+    fn next(
+        state: State,
+        universe: &RoaringBitmap,
+    ) -> (State, Option<RankingRuleOutput<QueryGraph>>) {
+        let (state, output) = match state {
+            State::Uninitialized => (state, None),
+            State::ExactAttribute(query_graph, candidates_per_attribute) => {
+                // TODO it can be much faster to do the intersections before the unions...
+                //      or maybe the candidates_per_attribute are not containing anything outside universe
+                let mut candidates = MultiOps::union(candidates_per_attribute.iter().map(
+                    |FieldCandidates { start_with_exact, exact_word_count }| {
+                        start_with_exact & exact_word_count
+                    },
+                ));
+                candidates &= universe;
+                (
+                    State::AttributeStarts(query_graph.clone(), candidates_per_attribute),
+                    Some(RankingRuleOutput {
+                        query: query_graph,
+                        candidates,
+                        score: ScoreDetails::ExactAttribute(
+                            score_details::ExactAttribute::ExactMatch,
+                        ),
+                    }),
+                )
+            }
+            State::AttributeStarts(query_graph, candidates_per_attribute) => {
+                // TODO it can be much faster to do the intersections before the unions...
+                //      or maybe the candidates_per_attribute are not containing anything outside universe
+                let mut candidates = MultiOps::union(candidates_per_attribute.into_iter().map(
+                    |FieldCandidates { mut start_with_exact, exact_word_count }| {
+                        start_with_exact -= exact_word_count;
+                        start_with_exact
+                    },
+                ));
+                candidates &= universe;
+                (
+                    State::Empty(query_graph.clone()),
+                    Some(RankingRuleOutput {
+                        query: query_graph,
+                        candidates,
+                        score: ScoreDetails::ExactAttribute(
+                            score_details::ExactAttribute::MatchesStart,
+                        ),
+                    }),
+                )
+            }
+            State::Empty(query_graph) => (
+                State::Empty(query_graph.clone()),
+                Some(RankingRuleOutput {
+                    query: query_graph,
+                    candidates: universe.clone(),
+                    score: ScoreDetails::ExactAttribute(
+                        score_details::ExactAttribute::NoExactMatch,
+                    ),
+                }),
+            ),
+        };
+        (state, output)
+    }
+}
--- a/crates/milli/src/search/new/geo_sort.rs
+++ b/crates/milli/src/search/new/geo_sort.rs
@ -0,0 +1,309 @@
+use std::collections::VecDeque;
+use std::iter::FromIterator;
+
+use heed::types::{Bytes, Unit};
+use heed::{RoPrefix, RoTxn};
+use roaring::RoaringBitmap;
+use rstar::RTree;
+
+use super::facet_string_values;
+use super::ranking_rules::{RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
+use crate::heed_codec::facet::{FieldDocIdFacetCodec, OrderedF64Codec};
+use crate::score_details::{self, ScoreDetails};
+use crate::{
+    distance_between_two_points, lat_lng_to_xyz, GeoPoint, Index, Result, SearchContext,
+    SearchLogger,
+};
+
+const FID_SIZE: usize = 2;
+const DOCID_SIZE: usize = 4;
+
+#[allow(clippy::drop_non_drop)]
+fn facet_values_prefix_key(distinct: u16, id: u32) -> [u8; FID_SIZE + DOCID_SIZE] {
+    concat_arrays::concat_arrays!(distinct.to_be_bytes(), id.to_be_bytes())
+}
+
+/// Return an iterator over each number value in the given field of the given document.
+fn facet_number_values<'a>(
+    docid: u32,
+    field_id: u16,
+    index: &Index,
+    txn: &'a RoTxn<'a>,
+) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<OrderedF64Codec>, Unit>> {
+    let key = facet_values_prefix_key(field_id, docid);
+
+    let iter = index
+        .field_id_docid_facet_f64s
+        .remap_key_type::<Bytes>()
+        .prefix_iter(txn, &key)?
+        .remap_key_type();
+
+    Ok(iter)
+}
+
+/// Define the strategy used by the geo sort.
+/// The parameter represents the cache size, and, in the case of the Dynamic strategy,
+/// the point where we move from using the iterative strategy to the rtree.
+#[derive(Debug, Clone, Copy)]
+pub enum Strategy {
+    AlwaysIterative(usize),
+    AlwaysRtree(usize),
+    Dynamic(usize),
+}
+
+impl Default for Strategy {
+    fn default() -> Self {
+        Strategy::Dynamic(1000)
+    }
+}
+
+impl Strategy {
+    pub fn use_rtree(&self, candidates: usize) -> bool {
+        match self {
+            Strategy::AlwaysIterative(_) => false,
+            Strategy::AlwaysRtree(_) => true,
+            Strategy::Dynamic(i) => candidates >= *i,
+        }
+    }
+
+    pub fn cache_size(&self) -> usize {
+        match self {
+            Strategy::AlwaysIterative(i) | Strategy::AlwaysRtree(i) | Strategy::Dynamic(i) => *i,
+        }
+    }
+}
+
+pub struct GeoSort<Q: RankingRuleQueryTrait> {
+    query: Option<Q>,
+
+    strategy: Strategy,
+    ascending: bool,
+    point: [f64; 2],
+    field_ids: Option<[u16; 2]>,
+    rtree: Option<RTree<GeoPoint>>,
+
+    cached_sorted_docids: VecDeque<(u32, [f64; 2])>,
+    geo_candidates: RoaringBitmap,
+}
+
+impl<Q: RankingRuleQueryTrait> GeoSort<Q> {
+    pub fn new(
+        strategy: Strategy,
+        geo_faceted_docids: RoaringBitmap,
+        point: [f64; 2],
+        ascending: bool,
+    ) -> Result<Self> {
+        Ok(Self {
+            query: None,
+            strategy,
+            ascending,
+            point,
+            geo_candidates: geo_faceted_docids,
+            field_ids: None,
+            rtree: None,
+            cached_sorted_docids: VecDeque::new(),
+        })
+    }
+
+    /// Refill the internal buffer of cached docids based on the strategy.
+    /// Drop the rtree if we don't need it anymore.
+    fn fill_buffer(
+        &mut self,
+        ctx: &mut SearchContext<'_>,
+        geo_candidates: &RoaringBitmap,
+    ) -> Result<()> {
+        debug_assert!(self.field_ids.is_some(), "fill_buffer can't be called without the lat&lng");
+        debug_assert!(self.cached_sorted_docids.is_empty());
+
+        // lazily initialize the rtree if needed by the strategy, and cache it in `self.rtree`
+        let rtree = if self.strategy.use_rtree(geo_candidates.len() as usize) {
+            if let Some(rtree) = self.rtree.as_ref() {
+                // get rtree from cache
+                Some(rtree)
+            } else {
+                let rtree = ctx.index.geo_rtree(ctx.txn)?.expect("geo candidates but no rtree");
+                // insert rtree in cache and returns it.
+                // Can't use `get_or_insert_with` because getting the rtree from the DB is a fallible operation.
+                Some(&*self.rtree.insert(rtree))
+            }
+        } else {
+            None
+        };
+
+        let cache_size = self.strategy.cache_size();
+        if let Some(rtree) = rtree {
+            if self.ascending {
+                let point = lat_lng_to_xyz(&self.point);
+                for point in rtree.nearest_neighbor_iter(&point) {
+                    if geo_candidates.contains(point.data.0) {
+                        self.cached_sorted_docids.push_back(point.data);
+                        if self.cached_sorted_docids.len() >= cache_size {
+                            break;
+                        }
+                    }
+                }
+            } else {
+                // in the case of the desc geo sort we look for the closest point to the opposite of the queried point
+                // and we insert the points in reverse order they get reversed when emptying the cache later on
+                let point = lat_lng_to_xyz(&opposite_of(self.point));
+                for point in rtree.nearest_neighbor_iter(&point) {
+                    if geo_candidates.contains(point.data.0) {
+                        self.cached_sorted_docids.push_front(point.data);
+                        if self.cached_sorted_docids.len() >= cache_size {
+                            break;
+                        }
+                    }
+                }
+            }
+        } else {
+            // the iterative version
+            let [lat, lng] = self.field_ids.unwrap();
+
+            let mut documents = geo_candidates
+                .iter()
+                .map(|id| -> Result<_> { Ok((id, geo_value(id, lat, lng, ctx.index, ctx.txn)?)) })
+                .collect::<Result<Vec<(u32, [f64; 2])>>>()?;
+            // computing the distance between two points is expensive thus we cache the result
+            documents
+                .sort_by_cached_key(|(_, p)| distance_between_two_points(&self.point, p) as usize);
+            self.cached_sorted_docids.extend(documents);
+        };
+
+        Ok(())
+    }
+}
+
+/// Extracts the lat and long values from a single document.
+///
+/// If it is not able to find it in the facet number index it will extract it
+/// from the facet string index and parse it as f64 (as the geo extraction behaves).
+fn geo_value(
+    docid: u32,
+    field_lat: u16,
+    field_lng: u16,
+    index: &Index,
+    rtxn: &RoTxn<'_>,
+) -> Result<[f64; 2]> {
+    let extract_geo = |geo_field: u16| -> Result<f64> {
+        match facet_number_values(docid, geo_field, index, rtxn)?.next() {
+            Some(Ok(((_, _, geo), ()))) => Ok(geo),
+            Some(Err(e)) => Err(e.into()),
+            None => match facet_string_values(docid, geo_field, index, rtxn)?.next() {
+                Some(Ok((_, geo))) => {
+                    Ok(geo.parse::<f64>().expect("cannot parse geo field as f64"))
+                }
+                Some(Err(e)) => Err(e.into()),
+                None => panic!("A geo faceted document doesn't contain any lat or lng"),
+            },
+        }
+    };
+
+    let lat = extract_geo(field_lat)?;
+    let lng = extract_geo(field_lng)?;
+
+    Ok([lat, lng])
+}
+
+impl<'ctx, Q: RankingRuleQueryTrait> RankingRule<'ctx, Q> for GeoSort<Q> {
+    fn id(&self) -> String {
+        "geo_sort".to_owned()
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
+    fn start_iteration(
+        &mut self,
+        ctx: &mut SearchContext<'ctx>,
+        _logger: &mut dyn SearchLogger<Q>,
+        universe: &RoaringBitmap,
+        query: &Q,
+    ) -> Result<()> {
+        assert!(self.query.is_none());
+
+        self.query = Some(query.clone());
+
+        let geo_candidates = &self.geo_candidates & universe;
+
+        if geo_candidates.is_empty() {
+            return Ok(());
+        }
+
+        let fid_map = ctx.index.fields_ids_map(ctx.txn)?;
+        let lat = fid_map.id("_geo.lat").expect("geo candidates but no fid for lat");
+        let lng = fid_map.id("_geo.lng").expect("geo candidates but no fid for lng");
+        self.field_ids = Some([lat, lng]);
+        self.fill_buffer(ctx, &geo_candidates)?;
+        Ok(())
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
+    #[allow(clippy::only_used_in_recursion)]
+    fn next_bucket(
+        &mut self,
+        ctx: &mut SearchContext<'ctx>,
+        logger: &mut dyn SearchLogger<Q>,
+        universe: &RoaringBitmap,
+    ) -> Result<Option<RankingRuleOutput<Q>>> {
+        let query = self.query.as_ref().unwrap().clone();
+
+        let geo_candidates = &self.geo_candidates & universe;
+
+        if geo_candidates.is_empty() {
+            return Ok(Some(RankingRuleOutput {
+                query,
+                candidates: universe.clone(),
+                score: ScoreDetails::GeoSort(score_details::GeoSort {
+                    target_point: self.point,
+                    ascending: self.ascending,
+                    value: None,
+                }),
+            }));
+        }
+
+        let ascending = self.ascending;
+        let next = |cache: &mut VecDeque<_>| {
+            if ascending {
+                cache.pop_front()
+            } else {
+                cache.pop_back()
+            }
+        };
+        while let Some((id, point)) = next(&mut self.cached_sorted_docids) {
+            if geo_candidates.contains(id) {
+                return Ok(Some(RankingRuleOutput {
+                    query,
+                    candidates: RoaringBitmap::from_iter([id]),
+                    score: ScoreDetails::GeoSort(score_details::GeoSort {
+                        target_point: self.point,
+                        ascending: self.ascending,
+                        value: Some(point),
+                    }),
+                }));
+            }
+        }
+
+        // if we got out of this loop it means we've exhausted our cache.
+        // we need to refill it and run the function again.
+        self.fill_buffer(ctx, &geo_candidates)?;
+        self.next_bucket(ctx, logger, universe)
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::geo_sort")]
+    fn end_iteration(&mut self, _ctx: &mut SearchContext<'ctx>, _logger: &mut dyn SearchLogger<Q>) {
+        // we do not reset the rtree here, it could be used in a next iteration
+        self.query = None;
+        self.cached_sorted_docids.clear();
+    }
+}
+
+/// Compute the antipodal coordinate of `coord`
+fn opposite_of(mut coord: [f64; 2]) -> [f64; 2] {
+    coord[0] *= -1.;
+    // in the case of x,0 we want to return x,180
+    if coord[1] > 0. {
+        coord[1] -= 180.;
+    } else {
+        coord[1] += 180.;
+    }
+
+    coord
+}
--- a/crates/milli/src/search/new/graph_based_ranking_rule.rs
+++ b/crates/milli/src/search/new/graph_based_ranking_rule.rs
@ -0,0 +1,431 @@
+/*! Implementation of a generic graph-based ranking rule.
+
+A graph-based ranking rule is a ranking rule that works by representing
+its possible operations and their relevancy cost as a directed acyclic multi-graph
+built on top of the query graph. It then computes its buckets by finding the
+cheapest paths from the start node to the end node and computing the document ids
+that satisfy those paths.
+
+For example, the proximity ranking rule builds a graph where the edges between two
+nodes represent a condition that the term of the source node is in a certain proximity
+to the term of the destination node. With the query "pretty house by" where the term
+"pretty" has three possible proximities to the term "house" and "house" has two
+proximities to "by", the graph will look like this:
+
+```txt
+┌───────┐     ┌───────┐─────1────▶┌───────┐──1──▶┌─────┐    ┌───────┐
+│ START │──0─▶│pretty │─────2────▶│ house │      │ by  │─0─▶│  END  │
+└───────┘     └───────┘─────3────▶└───────┘──2-─▶└─────┘    └───────┘
+```
+The proximity ranking rule's first bucket will be determined by the union of all
+the shortest paths from START to END, which in this case is:
+```txt
+START --0-> pretty --1--> house --1--> by --0--> end
+```
+The path's corresponding document ids are found by taking the intersection of the
+document ids of each edge. That is, we find the documents where both `pretty` is
+1-close to `house` AND `house` is 1-close to `by`.
+
+For the second bucket, we get the union of the second-cheapest paths, which are:
+```txt
+START --0-> pretty --1--> house --2--> by --0--> end
+START --0-> pretty --2--> house --1--> by --0--> end
+```
+That is we find the documents where either:
+- `pretty` is 1-close to `house` AND `house` is 2-close to `by`
+- OR: `pretty` is 2-close to `house` AND `house` is 1-close to `by`
+*/
+
+use std::collections::BTreeSet;
+use std::ops::ControlFlow;
+
+use roaring::RoaringBitmap;
+
+use super::interner::{Interned, MappedInterner};
+use super::logger::SearchLogger;
+use super::query_graph::QueryNode;
+use super::ranking_rule_graph::{
+    ConditionDocIdsCache, DeadEndsCache, ExactnessGraph, FidGraph, PositionGraph, ProximityGraph,
+    RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, WordsGraph,
+};
+use super::small_bitmap::SmallBitmap;
+use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
+use crate::score_details::Rank;
+use crate::search::new::query_term::LocatedQueryTermSubset;
+use crate::search::new::ranking_rule_graph::PathVisitor;
+use crate::{Result, TermsMatchingStrategy};
+
+pub type Words = GraphBasedRankingRule<WordsGraph>;
+impl GraphBasedRankingRule<WordsGraph> {
+    pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
+        Self::new_with_id("words".to_owned(), Some(terms_matching_strategy))
+    }
+}
+pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
+impl GraphBasedRankingRule<ProximityGraph> {
+    pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
+        Self::new_with_id("proximity".to_owned(), terms_matching_strategy)
+    }
+}
+pub type Fid = GraphBasedRankingRule<FidGraph>;
+impl GraphBasedRankingRule<FidGraph> {
+    pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
+        Self::new_with_id("fid".to_owned(), terms_matching_strategy)
+    }
+}
+pub type Position = GraphBasedRankingRule<PositionGraph>;
+impl GraphBasedRankingRule<PositionGraph> {
+    pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
+        Self::new_with_id("position".to_owned(), terms_matching_strategy)
+    }
+}
+pub type Typo = GraphBasedRankingRule<TypoGraph>;
+impl GraphBasedRankingRule<TypoGraph> {
+    pub fn new(terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
+        Self::new_with_id("typo".to_owned(), terms_matching_strategy)
+    }
+}
+pub type Exactness = GraphBasedRankingRule<ExactnessGraph>;
+impl GraphBasedRankingRule<ExactnessGraph> {
+    pub fn new() -> Self {
+        Self::new_with_id("exactness".to_owned(), None)
+    }
+}
+
+/// A generic graph-based ranking rule
+pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
+    id: String,
+    terms_matching_strategy: Option<TermsMatchingStrategy>,
+    // When the ranking rule is not iterating over its buckets,
+    // its state is `None`.
+    state: Option<GraphBasedRankingRuleState<G>>,
+}
+impl<G: RankingRuleGraphTrait> GraphBasedRankingRule<G> {
+    /// Creates the ranking rule with the given identifier
+    pub fn new_with_id(id: String, terms_matching_strategy: Option<TermsMatchingStrategy>) -> Self {
+        Self { id, terms_matching_strategy, state: None }
+    }
+}
+
+/// The internal state of a graph-based ranking rule during iteration
+pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
+    /// The current graph
+    graph: RankingRuleGraph<G>,
+    /// Cache to retrieve the docids associated with each edge
+    conditions_cache: ConditionDocIdsCache<G>,
+    /// Cache used to optimistically discard paths that resolve to no documents.
+    dead_ends_cache: DeadEndsCache<G::Condition>,
+    /// A structure giving the list of possible costs from each node to the end node
+    all_costs: MappedInterner<QueryNode, Vec<u64>>,
+    /// An index in the first element of `all_distances`, giving the cost of the next bucket
+    cur_cost: u64,
+    /// One above the highest possible cost for this rule
+    next_max_cost: u64,
+}
+
+impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
+    fn id(&self) -> String {
+        self.id.clone()
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")]
+    fn start_iteration(
+        &mut self,
+        ctx: &mut SearchContext<'ctx>,
+        _logger: &mut dyn SearchLogger<QueryGraph>,
+        _universe: &RoaringBitmap,
+        query_graph: &QueryGraph,
+    ) -> Result<()> {
+        // the `next_max_cost` is the successor integer to the maximum cost of the paths in the graph.
+        //
+        // When there is a matching strategy, it also factors the additional costs of:
+        // 1. The words that are matched in phrases
+        // 2. Skipping words (by adding them to the paths with a cost)
+        let mut next_max_cost = 1;
+        let removal_cost = if let Some(terms_matching_strategy) = self.terms_matching_strategy {
+            // add the cost of the phrase to the next_max_cost
+            next_max_cost += query_graph
+                .words_in_phrases_count(ctx)
+                // remove 1 from the words in phrases count, because when there is a phrase we can now have a document
+                // where only the phrase is matching, and none of the non-phrase words.
+                // With the `1` that `next_max_cost` is initialized with, this gets counted twice.
+                .saturating_sub(1) as u64;
+            match terms_matching_strategy {
+                TermsMatchingStrategy::Last => {
+                    let removal_order =
+                        query_graph.removal_order_for_terms_matching_strategy_last(ctx);
+                    let mut forbidden_nodes =
+                        SmallBitmap::for_interned_values_in(&query_graph.nodes);
+                    let mut costs = query_graph.nodes.map(|_| None);
+                    // FIXME: this works because only words uses termsmatchingstrategy at the moment.
+                    for ns in removal_order {
+                        for n in ns.iter() {
+                            *costs.get_mut(n) = Some((1, forbidden_nodes.clone()));
+                        }
+                        forbidden_nodes.union(&ns);
+                    }
+                    costs
+                }
+                TermsMatchingStrategy::Frequency => {
+                    let removal_order =
+                        query_graph.removal_order_for_terms_matching_strategy_frequency(ctx)?;
+                    let mut forbidden_nodes =
+                        SmallBitmap::for_interned_values_in(&query_graph.nodes);
+                    let mut costs = query_graph.nodes.map(|_| None);
+                    // FIXME: this works because only words uses termsmatchingstrategy at the moment.
+                    for ns in removal_order {
+                        for n in ns.iter() {
+                            *costs.get_mut(n) = Some((1, forbidden_nodes.clone()));
+                        }
+                        forbidden_nodes.union(&ns);
+                    }
+                    costs
+                }
+                TermsMatchingStrategy::All => query_graph.nodes.map(|_| None),
+            }
+        } else {
+            query_graph.nodes.map(|_| None)
+        };
+
+        let graph = RankingRuleGraph::build(ctx, query_graph.clone(), removal_cost)?;
+        let condition_docids_cache = ConditionDocIdsCache::default();
+        let dead_ends_cache = DeadEndsCache::new(&graph.conditions_interner);
+
+        // Then pre-compute the cost of all paths from each node to the end node
+        let all_costs = graph.find_all_costs_to_end();
+
+        next_max_cost +=
+            all_costs.get(graph.query_graph.root_node).iter().copied().max().unwrap_or(0);
+
+        let state = GraphBasedRankingRuleState {
+            graph,
+            conditions_cache: condition_docids_cache,
+            dead_ends_cache,
+            all_costs,
+            cur_cost: 0,
+            next_max_cost,
+        };
+
+        self.state = Some(state);
+
+        Ok(())
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")]
+    fn next_bucket(
+        &mut self,
+        ctx: &mut SearchContext<'ctx>,
+        logger: &mut dyn SearchLogger<QueryGraph>,
+        universe: &RoaringBitmap,
+    ) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
+        // Will crash if `next_bucket` is called before `start_iteration` or after `end_iteration`,
+        // should never happen
+        let mut state = self.state.take().unwrap();
+
+        let all_costs = state.all_costs.get(state.graph.query_graph.root_node);
+        // Retrieve the cost of the paths to compute
+        let Some(&cost) = all_costs.iter().find(|c| **c >= state.cur_cost) else {
+            self.state = None;
+            return Ok(None);
+        };
+        state.cur_cost = cost + 1;
+
+        let mut bucket = RoaringBitmap::new();
+
+        let GraphBasedRankingRuleState {
+            graph,
+            conditions_cache: condition_docids_cache,
+            dead_ends_cache,
+            all_costs,
+            cur_cost: _,
+            next_max_cost,
+        } = &mut state;
+
+        let rank = *next_max_cost - cost;
+        let score = G::rank_to_score(Rank { rank: rank as u32, max_rank: *next_max_cost as u32 });
+
+        let mut universe = universe.clone();
+
+        let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner);
+        let mut good_paths = vec![];
+        let mut considered_paths = vec![];
+
+        // For each path of the given cost, we will compute its associated
+        // document ids.
+        // In case the path does not resolve to any document id, we try to figure out why
+        // and update the `dead_ends_cache` accordingly.
+        // Updating the dead_ends_cache helps speed up the execution of `visit_paths_of_cost` and reduces
+        // the number of future candidate paths given by that same function.
+
+        let mut subpaths_docids: Vec<(Interned<G::Condition>, RoaringBitmap)> = vec![];
+
+        let mut nodes_with_removed_outgoing_conditions = BTreeSet::new();
+        let visitor = PathVisitor::new(cost, graph, all_costs, dead_ends_cache);
+
+        visitor.visit_paths(&mut |path, graph, dead_ends_cache| {
+            considered_paths.push(path.to_vec());
+            // If the universe is empty, stop exploring the graph, since no docids will ever be found anymore.
+            if universe.is_empty() {
+                return Ok(ControlFlow::Break(()));
+            }
+            // `visit_paths` performs a depth-first search, so the previously visited path
+            // is likely to share a prefix with the current one.
+            // We stored the previous path and the docids associated to each of its prefixes in `subpaths_docids`.
+            // We take advantage of this to avoid computing the docids associated with the common prefix between
+            // the old and current path.
+            let idx_of_first_different_condition = {
+                let mut idx = 0;
+                for (&last_c, cur_c) in path.iter().zip(subpaths_docids.iter().map(|x| x.0)) {
+                    if last_c == cur_c {
+                        idx += 1;
+                    } else {
+                        break;
+                    }
+                }
+                subpaths_docids.truncate(idx);
+                idx
+            };
+            // Then for the remaining of the path, we continue computing docids.
+            for latest_condition in path[idx_of_first_different_condition..].iter().copied() {
+                let success = visit_path_condition(
+                    ctx,
+                    graph,
+                    &universe,
+                    dead_ends_cache,
+                    condition_docids_cache,
+                    &mut subpaths_docids,
+                    &mut nodes_with_removed_outgoing_conditions,
+                    latest_condition,
+                )?;
+                if !success {
+                    return Ok(ControlFlow::Continue(()));
+                }
+            }
+            assert!(subpaths_docids.iter().map(|x| x.0).eq(path.iter().copied()));
+
+            let path_docids =
+                subpaths_docids.pop().map(|x| x.1).unwrap_or_else(|| universe.clone());
+            assert!(!path_docids.is_empty());
+
+            // Accumulate the path for logging purposes only
+            good_paths.push(path.to_vec());
+            for &condition in path {
+                used_conditions.insert(condition);
+            }
+            bucket |= &path_docids;
+            // Reduce the size of the universe so that we can more optimistically discard candidate paths
+            universe -= &path_docids;
+            for (_, docids) in subpaths_docids.iter_mut() {
+                *docids -= &path_docids;
+            }
+
+            if universe.is_empty() {
+                Ok(ControlFlow::Break(()))
+            } else {
+                Ok(ControlFlow::Continue(()))
+            }
+        })?;
+        logger.log_internal_state(graph);
+        logger.log_internal_state(&good_paths);
+
+        // We modify the next query graph so that it only contains the subgraph
+        // that was used to compute this bucket
+
+        let paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>> = good_paths
+            .into_iter()
+            .map(|path| {
+                path.into_iter()
+                    .map(|condition| {
+                        let (a, b) =
+                            condition_docids_cache.get_subsets_used_by_condition(condition);
+                        (a.clone(), b.clone())
+                    })
+                    .collect()
+            })
+            .collect();
+
+        let next_query_graph = QueryGraph::build_from_paths(paths);
+
+        #[allow(clippy::comparison_chain)]
+        if nodes_with_removed_outgoing_conditions.len() == 1 {
+            graph.update_all_costs_before_node(
+                *nodes_with_removed_outgoing_conditions.first().unwrap(),
+                all_costs,
+            );
+        } else if nodes_with_removed_outgoing_conditions.len() > 1 {
+            *all_costs = graph.find_all_costs_to_end();
+        }
+
+        self.state = Some(state);
+
+        Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket, score }))
+    }
+
+    #[tracing::instrument(level = "trace", skip_all, target = "search::graph_based")]
+    fn end_iteration(
+        &mut self,
+        _ctx: &mut SearchContext<'ctx>,
+        _logger: &mut dyn SearchLogger<QueryGraph>,
+    ) {
+        self.state = None;
+    }
+}
+
+/// Returns false if the intersection between the condition
+/// docids and the previous path docids is empty.
+#[allow(clippy::too_many_arguments)]
+fn visit_path_condition<G: RankingRuleGraphTrait>(
+    ctx: &mut SearchContext<'_>,
+    graph: &mut RankingRuleGraph<G>,
+    universe: &RoaringBitmap,
+    dead_ends_cache: &mut DeadEndsCache<G::Condition>,
+    condition_docids_cache: &mut ConditionDocIdsCache<G>,
+    subpath: &mut Vec<(Interned<G::Condition>, RoaringBitmap)>,
+    nodes_with_removed_outgoing_conditions: &mut BTreeSet<Interned<QueryNode>>,
+    latest_condition: Interned<G::Condition>,
+) -> Result<bool> {
+    let condition_docids = &condition_docids_cache
+        .get_computed_condition(ctx, latest_condition, graph, universe)?
+        .docids;
+    if condition_docids.is_empty() {
+        // 1. Store in the cache that this edge is empty for this universe
+        dead_ends_cache.forbid_condition(latest_condition);
+        // 2. remove all the edges with this condition from the ranking rule graph
+        let source_nodes = graph.remove_edges_with_condition(latest_condition);
+        nodes_with_removed_outgoing_conditions.extend(source_nodes);
+        return Ok(false);
+    }
+
+    let latest_path_docids = if let Some((_, prev_docids)) = subpath.last() {
+        prev_docids & condition_docids
+    } else {
+        condition_docids.clone()
+    };
+    if !latest_path_docids.is_empty() {
+        subpath.push((latest_condition, latest_path_docids));
+        return Ok(true);
+    }
+    // If the (sub)path is empty, we try to figure out why and update the caches accordingly.
+
+    // First, we know that this path is empty, and thus any path
+    // that is a superset of it will also be empty.
+    dead_ends_cache.forbid_condition_after_prefix(subpath.iter().map(|x| x.0), latest_condition);
+
+    if subpath.len() <= 1 {
+        return Ok(false);
+    }
+    let mut subprefix = vec![];
+    // Deadend if the intersection between this edge and any
+    // previous prefix is disjoint with the universe
+    // We already know that the intersection with the last one
+    // is empty,
+    for (past_condition, sp_docids) in subpath[..subpath.len() - 1].iter() {
+        subprefix.push(*past_condition);
+        if condition_docids.is_disjoint(sp_docids) {
+            dead_ends_cache
+                .forbid_condition_after_prefix(subprefix.iter().copied(), latest_condition);
+        }
+    }
+
+    Ok(false)
+}
--- a/crates/milli/src/search/new/interner.rs
+++ b/crates/milli/src/search/new/interner.rs
@ -0,0 +1,259 @@
+use std::fmt;
+use std::hash::Hash;
+use std::marker::PhantomData;
+
+use fxhash::FxHashMap;
+
+use super::small_bitmap::SmallBitmap;
+
+/// An index within an interner ([`FixedSizeInterner`], [`DedupInterner`], or [`MappedInterner`]).
+pub struct Interned<T> {
+    idx: u16,
+    _phantom: PhantomData<T>,
+}
+impl<T> Interned<T> {
+    /// Create an interned value manually from its raw index within the interner.
+    pub fn from_raw(idx: u16) -> Self {
+        Self { idx, _phantom: PhantomData }
+    }
+    /// Get the raw index from the interned value
+    pub fn into_raw(self) -> u16 {
+        self.idx
+    }
+}
+
+/// A [`DedupInterner`] is used to store a unique copy of a value of type `T`. This value
+/// is then identified by a lightweight index of type [`Interned<T>`], which can
+/// be copied, compared, and hashed efficiently. An immutable reference to the original value
+/// can be retrieved using `self.get(interned)`. A set of values within the interner can be
+/// efficiently managed using [`SmallBitmap<T>`](super::small_bitmap::SmallBitmap).
+///
+/// A dedup-interner can contain a maximum of `u16::MAX` values.
+#[derive(Clone)]
+pub struct DedupInterner<T> {
+    stable_store: Vec<T>,
+    lookup: FxHashMap<T, Interned<T>>,
+}
+impl<T> Default for DedupInterner<T> {
+    fn default() -> Self {
+        Self { stable_store: Default::default(), lookup: Default::default() }
+    }
+}
+impl<T> DedupInterner<T> {
+    /// Convert the dedup-interner into a fixed-size interner, such that new
+    /// elements cannot be added to it anymore.
+    pub fn freeze(self) -> FixedSizeInterner<T> {
+        FixedSizeInterner { stable_store: self.stable_store }
+    }
+}
+
+impl<T> DedupInterner<T>
+where
+    T: Clone + Eq + Hash,
+{
+    /// Insert the given value into the dedup-interner, and return
+    /// its index.
+    pub fn insert(&mut self, s: T) -> Interned<T> {
+        if let Some(interned) = self.lookup.get(&s) {
+            *interned
+        } else {
+            assert!(self.stable_store.len() < u16::MAX as usize);
+            self.stable_store.push(s.clone());
+            let interned = Interned::from_raw(self.stable_store.len() as u16 - 1);
+            self.lookup.insert(s, interned);
+            interned
+        }
+    }
+    /// Get a reference to the interned value.
+    pub fn get(&self, interned: Interned<T>) -> &T {
+        &self.stable_store[interned.idx as usize]
+    }
+}
+
+/// A fixed-length store for values of type `T`, where each value is identified
+/// by an index of type [`Interned<T>`].
+#[derive(Clone)]
+pub struct FixedSizeInterner<T> {
+    stable_store: Vec<T>,
+}
+impl<T: Clone> FixedSizeInterner<T> {
+    /// Create a fixed-size interner of the given length containing
+    /// clones of the given value.
+    pub fn new(length: u16, value: T) -> Self {
+        Self { stable_store: vec![value; length as usize] }
+    }
+}
+
+impl<T> FixedSizeInterner<T> {
+    pub fn from_vec(store: Vec<T>) -> Self {
+        Self { stable_store: store }
+    }
+    pub fn all_interned_values(&self) -> SmallBitmap<T> {
+        let mut b = SmallBitmap::for_interned_values_in(self);
+        for i in self.indexes() {
+            b.insert(i);
+        }
+        b
+    }
+    pub fn get(&self, interned: Interned<T>) -> &T {
+        &self.stable_store[interned.idx as usize]
+    }
+    pub fn get_mut(&mut self, interned: Interned<T>) -> &mut T {
+        &mut self.stable_store[interned.idx as usize]
+    }
+
+    pub fn len(&self) -> u16 {
+        self.stable_store.len() as u16
+    }
+    pub fn map_move<U>(self, map_f: impl Fn(T) -> U) -> FixedSizeInterner<U> {
+        FixedSizeInterner { stable_store: self.stable_store.into_iter().map(map_f).collect() }
+    }
+    pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<T, U> {
+        MappedInterner {
+            stable_store: self.stable_store.iter().map(map_f).collect(),
+            _phantom: PhantomData,
+        }
+    }
+    pub fn map_indexes<U>(&self, map_f: impl Fn(Interned<T>) -> U) -> MappedInterner<T, U> {
+        MappedInterner { stable_store: self.indexes().map(map_f).collect(), _phantom: PhantomData }
+    }
+    pub fn indexes(&self) -> impl Iterator<Item = Interned<T>> {
+        (0..self.stable_store.len()).map(|i| Interned::from_raw(i as u16))
+    }
+    pub fn iter(&self) -> impl Iterator<Item = (Interned<T>, &T)> {
+        self.stable_store.iter().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
+    }
+    pub fn iter_mut(&mut self) -> impl Iterator<Item = (Interned<T>, &mut T)> {
+        self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
+    }
+}
+
+/// A fixed-length store for values of type `T`, where each value is identified
+/// by an index of type [`Interned<T>`].
+#[derive(Clone)]
+pub struct Interner<T> {
+    stable_store: Vec<T>,
+}
+impl<T> Default for Interner<T> {
+    fn default() -> Self {
+        Self { stable_store: vec![] }
+    }
+}
+
+impl<T> Interner<T> {
+    pub fn from_vec(v: Vec<T>) -> Self {
+        Self { stable_store: v }
+    }
+    pub fn get(&self, interned: Interned<T>) -> &T {
+        &self.stable_store[interned.idx as usize]
+    }
+    pub fn get_mut(&mut self, interned: Interned<T>) -> &mut T {
+        &mut self.stable_store[interned.idx as usize]
+    }
+    pub fn push(&mut self, value: T) -> Interned<T> {
+        assert!(self.stable_store.len() < u16::MAX as usize);
+        self.stable_store.push(value);
+        Interned::from_raw(self.stable_store.len() as u16 - 1)
+    }
+    pub fn len(&self) -> u16 {
+        self.stable_store.len() as u16
+    }
+    pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<T, U> {
+        MappedInterner {
+            stable_store: self.stable_store.iter().map(map_f).collect(),
+            _phantom: PhantomData,
+        }
+    }
+    pub fn map_indexes<U>(&self, map_f: impl Fn(Interned<T>) -> U) -> MappedInterner<T, U> {
+        MappedInterner { stable_store: self.indexes().map(map_f).collect(), _phantom: PhantomData }
+    }
+    pub fn indexes(&self) -> impl Iterator<Item = Interned<T>> {
+        (0..self.stable_store.len()).map(|i| Interned::from_raw(i as u16))
+    }
+    pub fn iter(&self) -> impl Iterator<Item = (Interned<T>, &T)> {
+        self.stable_store.iter().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
+    }
+    pub fn iter_mut(&mut self) -> impl Iterator<Item = (Interned<T>, &mut T)> {
+        self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
+    }
+    pub fn freeze(self) -> FixedSizeInterner<T> {
+        FixedSizeInterner { stable_store: self.stable_store }
+    }
+}
+
+/// A store of values of type `T`, each linked to a value of type `From`
+/// stored in another interner. To create a mapped interner, use the
+/// `map` method on [`FixedSizeInterner`] or [`MappedInterner`].
+///
+/// Values in this interner are indexed with [`Interned<From>`].
+#[derive(Clone)]
+pub struct MappedInterner<From, T> {
+    stable_store: Vec<T>,
+    _phantom: PhantomData<From>,
+}
+
+impl<From, T> MappedInterner<From, T> {
+    pub fn get(&self, interned: Interned<From>) -> &T {
+        &self.stable_store[interned.idx as usize]
+    }
+    pub fn get_mut(&mut self, interned: Interned<From>) -> &mut T {
+        &mut self.stable_store[interned.idx as usize]
+    }
+    pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<From, U> {
+        MappedInterner {
+            stable_store: self.stable_store.iter().map(map_f).collect(),
+            _phantom: PhantomData,
+        }
+    }
+    pub fn iter(&self) -> impl Iterator<Item = (Interned<From>, &T)> {
+        self.stable_store.iter().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
+    }
+    pub fn iter_mut(&mut self) -> impl Iterator<Item = (Interned<From>, &mut T)> {
+        self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
+    }
+}
+// Interned<T> boilerplate implementations
+
+impl<T> Hash for Interned<T> {
+    fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
+        self.idx.hash(state);
+    }
+}
+
+impl<T> Ord for Interned<T> {
+    fn cmp(&self, other: &Self) -> std::cmp::Ordering {
+        self.idx.cmp(&other.idx)
+    }
+}
+
+impl<T> PartialOrd for Interned<T> {
+    fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl<T> Eq for Interned<T> {}
+
+impl<T> PartialEq for Interned<T> {
+    fn eq(&self, other: &Self) -> bool {
+        self.idx == other.idx
+    }
+}
+impl<T> Clone for Interned<T> {
+    fn clone(&self) -> Self {
+        *self
+    }
+}
+
+impl<T> Copy for Interned<T> {}
+
+impl<T> fmt::Display for Interned<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Display::fmt(&self.idx, f)
+    }
+}
+impl<T> fmt::Debug for Interned<T> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Debug::fmt(&self.idx, f)
+    }
+}
--- a/crates/milli/src/search/new/limits.rs
+++ b/crates/milli/src/search/new/limits.rs
@ -0,0 +1,17 @@
+/// Maximum number of tokens we consider in a single search.
+pub const MAX_TOKEN_COUNT: usize = 1_000;
+
+/// Maximum number of prefixes that can be derived from a single word.
+pub const MAX_PREFIX_COUNT: usize = 1_000;
+/// Maximum number of words that can be derived from a single word with a distance of one to that word.
+pub const MAX_ONE_TYPO_COUNT: usize = 150;
+/// Maximum number of words that can be derived from a single word with a distance of two to that word.
+pub const MAX_TWO_TYPOS_COUNT: usize = 50;
+
+/// Maximum amount of synonym phrases that can be derived from a single word.
+pub const MAX_SYNONYM_PHRASE_COUNT: usize = 50;
+
+/// Maximum amount of words inside of all the synonym phrases that can be derived from a single word.
+///
+/// This limit is meant to gracefully handle the case where a word would have very long phrases as synonyms.
+pub const MAX_SYNONYM_WORD_COUNT: usize = 100;
--- a/crates/milli/src/search/new/logger/mod.rs
+++ b/crates/milli/src/search/new/logger/mod.rs
@ -0,0 +1,81 @@
+// #[cfg(test)]
+pub mod visual;
+
+use std::any::Any;
+
+use roaring::RoaringBitmap;
+
+use super::ranking_rules::BoxRankingRule;
+use super::{RankingRule, RankingRuleQueryTrait};
+
+/// Trait for structure logging the execution of a search query.
+pub trait SearchLogger<Q: RankingRuleQueryTrait> {
+    /// Logs the initial query
+    fn initial_query(&mut self, _query: &Q);
+
+    /// Logs the value of the initial set of all candidates
+    fn initial_universe(&mut self, _universe: &RoaringBitmap);
+
+    /// Logs the query that was used to compute the set of all candidates
+    fn query_for_initial_universe(&mut self, _query: &Q);
+
+    /// Logs the ranking rules used to perform the search query
+    fn ranking_rules(&mut self, _rr: &[BoxRankingRule<'_, Q>]);
+
+    /// Logs the start of a ranking rule's iteration.
+    fn start_iteration_ranking_rule(
+        &mut self,
+        _ranking_rule_idx: usize,
+        _ranking_rule: &dyn RankingRule<'_, Q>,
+        _query: &Q,
+        _universe: &RoaringBitmap,
+    ) {
+    }
+    /// Logs the end of the computation of a ranking rule bucket
+    fn next_bucket_ranking_rule(
+        &mut self,
+        _ranking_rule_idx: usize,
+        _ranking_rule: &dyn RankingRule<'_, Q>,
+        _universe: &RoaringBitmap,
+        _candidates: &RoaringBitmap,
+    ) {
+    }
+    /// Logs the skipping of a ranking rule bucket
+    fn skip_bucket_ranking_rule(
+        &mut self,
+        _ranking_rule_idx: usize,
+        _ranking_rule: &dyn RankingRule<'_, Q>,
+        _candidates: &RoaringBitmap,
+    ) {
+    }
+    /// Logs the end of a ranking rule's iteration.
+    fn end_iteration_ranking_rule(
+        &mut self,
+        _ranking_rule_idx: usize,
+        _ranking_rule: &dyn RankingRule<'_, Q>,
+        _universe: &RoaringBitmap,
+    ) {
+    }
+    /// Logs the addition of document ids to the final results
+    fn add_to_results(&mut self, _docids: &[u32]);
+
+    /// Logs an internal state in the search algorithms
+    fn log_internal_state(&mut self, _rr: &dyn Any);
+}
+
+/// A dummy [`SearchLogger`] which does nothing.
+pub struct DefaultSearchLogger;
+
+impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
+    fn initial_query(&mut self, _query: &Q) {}
+
+    fn initial_universe(&mut self, _universe: &RoaringBitmap) {}
+
+    fn query_for_initial_universe(&mut self, _query: &Q) {}
+
+    fn ranking_rules(&mut self, _rr: &[BoxRankingRule<'_, Q>]) {}
+
+    fn add_to_results(&mut self, _docids: &[u32]) {}
+
+    fn log_internal_state(&mut self, _rr: &dyn Any) {}
+}
--- a/crates/milli/src/search/new/logger/visual.rs
+++ b/crates/milli/src/search/new/logger/visual.rs
@ -0,0 +1,554 @@
+use std::any::Any;
+use std::fs::File;
+use std::io::{BufWriter, Write};
+use std::path::{Path, PathBuf};
+use std::time::Instant;
+
+use roaring::RoaringBitmap;
+
+use crate::search::new::interner::Interned;
+use crate::search::new::query_graph::QueryNodeData;
+use crate::search::new::query_term::LocatedQueryTermSubset;
+use crate::search::new::ranking_rule_graph::{
+    Edge, FidCondition, FidGraph, PositionCondition, PositionGraph, ProximityCondition,
+    ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph,
+    WordsCondition, WordsGraph,
+};
+use crate::search::new::ranking_rules::BoxRankingRule;
+use crate::search::new::{QueryGraph, QueryNode, RankingRule, SearchContext, SearchLogger};
+use crate::Result;
+
+pub enum SearchEvents {
+    RankingRuleStartIteration { ranking_rule_idx: usize, universe_len: u64 },
+    RankingRuleNextBucket { ranking_rule_idx: usize, universe_len: u64, bucket_len: u64 },
+    RankingRuleSkipBucket { ranking_rule_idx: usize, bucket_len: u64 },
+    RankingRuleEndIteration { ranking_rule_idx: usize },
+    ExtendResults { new: Vec<u32> },
+    ProximityGraph { graph: RankingRuleGraph<ProximityGraph> },
+    ProximityPaths { paths: Vec<Vec<Interned<ProximityCondition>>> },
+    TypoGraph { graph: RankingRuleGraph<TypoGraph> },
+    TypoPaths { paths: Vec<Vec<Interned<TypoCondition>>> },
+    WordsGraph { graph: RankingRuleGraph<WordsGraph> },
+    WordsPaths { paths: Vec<Vec<Interned<WordsCondition>>> },
+    FidGraph { graph: RankingRuleGraph<FidGraph> },
+    FidPaths { paths: Vec<Vec<Interned<FidCondition>>> },
+    PositionGraph { graph: RankingRuleGraph<PositionGraph> },
+    PositionPaths { paths: Vec<Vec<Interned<PositionCondition>>> },
+}
+
+enum Location {
+    Words,
+    Typo,
+    Proximity,
+    Fid,
+    Position,
+    Other,
+}
+
+#[derive(Default)]
+pub struct VisualSearchLogger {
+    initial_query: Option<QueryGraph>,
+    initial_query_time: Option<Instant>,
+    query_for_universe: Option<QueryGraph>,
+    initial_universe: Option<RoaringBitmap>,
+    ranking_rules_ids: Option<Vec<String>>,
+    events: Vec<SearchEvents>,
+    location: Vec<Location>,
+}
+
+impl SearchLogger<QueryGraph> for VisualSearchLogger {
+    fn initial_query(&mut self, query: &QueryGraph) {
+        self.initial_query = Some(query.clone());
+        self.initial_query_time = Some(Instant::now());
+    }
+
+    fn query_for_initial_universe(&mut self, query: &QueryGraph) {
+        self.query_for_universe = Some(query.clone());
+    }
+
+    fn initial_universe(&mut self, universe: &RoaringBitmap) {
+        self.initial_universe = Some(universe.clone());
+    }
+    fn ranking_rules(&mut self, rr: &[BoxRankingRule<'_, QueryGraph>]) {
+        self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect());
+    }
+
+    fn start_iteration_ranking_rule(
+        &mut self,
+        ranking_rule_idx: usize,
+        ranking_rule: &dyn RankingRule<'_, QueryGraph>,
+        _query: &QueryGraph,
+        universe: &RoaringBitmap,
+    ) {
+        self.events.push(SearchEvents::RankingRuleStartIteration {
+            ranking_rule_idx,
+            universe_len: universe.len(),
+        });
+        self.location.push(match ranking_rule.id().as_str() {
+            "words" => Location::Words,
+            "typo" => Location::Typo,
+            "proximity" => Location::Proximity,
+            "fid" => Location::Fid,
+            "position" => Location::Position,
+            _ => Location::Other,
+        });
+    }
+
+    fn next_bucket_ranking_rule(
+        &mut self,
+        ranking_rule_idx: usize,
+        _ranking_rule: &dyn RankingRule<'_, QueryGraph>,
+        universe: &RoaringBitmap,
+        bucket: &RoaringBitmap,
+    ) {
+        self.events.push(SearchEvents::RankingRuleNextBucket {
+            ranking_rule_idx,
+            universe_len: universe.len(),
+            bucket_len: bucket.len(),
+        });
+    }
+    fn skip_bucket_ranking_rule(
+        &mut self,
+        ranking_rule_idx: usize,
+        _ranking_rule: &dyn RankingRule<'_, QueryGraph>,
+        bucket: &RoaringBitmap,
+    ) {
+        self.events.push(SearchEvents::RankingRuleSkipBucket {
+            ranking_rule_idx,
+            bucket_len: bucket.len(),
+        })
+    }
+
+    fn end_iteration_ranking_rule(
+        &mut self,
+        ranking_rule_idx: usize,
+        _ranking_rule: &dyn RankingRule<'_, QueryGraph>,
+        _universe: &RoaringBitmap,
+    ) {
+        self.events.push(SearchEvents::RankingRuleEndIteration { ranking_rule_idx });
+        self.location.pop();
+    }
+    fn add_to_results(&mut self, docids: &[u32]) {
+        self.events.push(SearchEvents::ExtendResults { new: docids.to_vec() });
+    }
+
+    /// Logs the internal state of the ranking rule
+    fn log_internal_state(&mut self, state: &dyn Any) {
+        let Some(location) = self.location.last() else { return };
+        match location {
+            Location::Words => {
+                if let Some(graph) = state.downcast_ref::<RankingRuleGraph<WordsGraph>>() {
+                    self.events.push(SearchEvents::WordsGraph { graph: graph.clone() });
+                }
+                if let Some(paths) = state.downcast_ref::<Vec<Vec<Interned<WordsCondition>>>>() {
+                    self.events.push(SearchEvents::WordsPaths { paths: paths.clone() });
+                }
+            }
+            Location::Typo => {
+                if let Some(graph) = state.downcast_ref::<RankingRuleGraph<TypoGraph>>() {
+                    self.events.push(SearchEvents::TypoGraph { graph: graph.clone() });
+                }
+                if let Some(paths) = state.downcast_ref::<Vec<Vec<Interned<TypoCondition>>>>() {
+                    self.events.push(SearchEvents::TypoPaths { paths: paths.clone() });
+                }
+            }
+            Location::Proximity => {
+                if let Some(graph) = state.downcast_ref::<RankingRuleGraph<ProximityGraph>>() {
+                    self.events.push(SearchEvents::ProximityGraph { graph: graph.clone() });
+                }
+                if let Some(paths) = state.downcast_ref::<Vec<Vec<Interned<ProximityCondition>>>>()
+                {
+                    self.events.push(SearchEvents::ProximityPaths { paths: paths.clone() });
+                }
+            }
+            Location::Fid => {
+                if let Some(graph) = state.downcast_ref::<RankingRuleGraph<FidGraph>>() {
+                    self.events.push(SearchEvents::FidGraph { graph: graph.clone() });
+                }
+                if let Some(paths) = state.downcast_ref::<Vec<Vec<Interned<FidCondition>>>>() {
+                    self.events.push(SearchEvents::FidPaths { paths: paths.clone() });
+                }
+            }
+            Location::Position => {
+                if let Some(graph) = state.downcast_ref::<RankingRuleGraph<PositionGraph>>() {
+                    self.events.push(SearchEvents::PositionGraph { graph: graph.clone() });
+                }
+                if let Some(paths) = state.downcast_ref::<Vec<Vec<Interned<PositionCondition>>>>() {
+                    self.events.push(SearchEvents::PositionPaths { paths: paths.clone() });
+                }
+            }
+            Location::Other => {}
+        }
+    }
+}
+
+impl VisualSearchLogger {
+    pub fn finish<'ctx>(self, ctx: &'ctx mut SearchContext<'ctx>, folder: &Path) -> Result<()> {
+        let mut f = DetailedLoggerFinish::new(ctx, folder)?;
+        f.finish(self)?;
+        Ok(())
+    }
+}
+
+struct DetailedLoggerFinish<'ctx> {
+    ctx: &'ctx mut SearchContext<'ctx>,
+    /// The folder where all the files should be printed
+    folder_path: PathBuf,
+    /// The main file visualising the search request
+    index_file: BufWriter<File>,
+    /// A vector of counters where each counter at index i represents the number of times
+    /// that the ranking rule at idx i-1 was called since its last call to `start_iteration`.
+    /// This is used to uniquely identify a point in the sequence diagram.
+    rr_action_counter: Vec<usize>,
+    /// The file storing information about the internal state of the latest active ranking rule
+    file_for_internal_state: Option<BufWriter<File>>,
+}
+
+impl<'ctx> DetailedLoggerFinish<'ctx> {
+    fn cur_file(&mut self) -> &mut BufWriter<File> {
+        if let Some(file) = self.file_for_internal_state.as_mut() {
+            file
+        } else {
+            &mut self.index_file
+        }
+    }
+    fn pop_rr_action(&mut self) {
+        self.file_for_internal_state = None;
+        self.rr_action_counter.pop();
+    }
+    fn push_new_rr_action(&mut self) {
+        self.file_for_internal_state = None;
+        self.rr_action_counter.push(0);
+    }
+    fn increment_cur_rr_action(&mut self) {
+        self.file_for_internal_state = None;
+        if let Some(c) = self.rr_action_counter.last_mut() {
+            *c += 1;
+        }
+    }
+    fn id_of_timestamp(&self) -> String {
+        let mut s = String::new();
+        for t in self.rr_action_counter.iter() {
+            s.push_str(&format!("{t}_"));
+        }
+        s
+    }
+    fn id_of_extend_results(&self) -> String {
+        let mut s = String::new();
+        s.push_str("results.\"");
+        s.push_str(&self.id_of_timestamp());
+        s.push('"');
+        s
+    }
+    fn id_of_last_rr_action(&self) -> String {
+        let mut s = String::new();
+        let rr_id = if self.rr_action_counter.is_empty() {
+            "start.\"".to_owned()
+        } else {
+            format!("{}.\"", self.rr_action_counter.len() - 1)
+        };
+        s.push_str(&rr_id);
+        s.push_str(&self.id_of_timestamp());
+        s.push('"');
+        s
+    }
+    fn make_new_file_for_internal_state_if_needed(&mut self) -> Result<()> {
+        if self.file_for_internal_state.is_some() {
+            return Ok(());
+        }
+        let timestamp = self.id_of_timestamp();
+        let id = self.id_of_last_rr_action();
+        let new_file_path = self.folder_path.join(format!("{timestamp}.d2"));
+        self.file_for_internal_state = Some(BufWriter::new(File::create(new_file_path)?));
+
+        writeln!(
+            &mut self.index_file,
+            "{id} {{
+    link: \"{timestamp}.d2.svg\"
+}}"
+        )?;
+        Ok(())
+    }
+    fn new(ctx: &'ctx mut SearchContext<'ctx>, folder_path: &Path) -> Result<Self> {
+        let index_path = folder_path.join("index.d2");
+        let index_file = BufWriter::new(File::create(index_path)?);
+
+        Ok(Self {
+            ctx,
+            folder_path: folder_path.to_owned(),
+            index_file,
+            rr_action_counter: vec![],
+            file_for_internal_state: None,
+        })
+    }
+
+    fn finish(&mut self, logger: VisualSearchLogger) -> Result<()> {
+        writeln!(&mut self.index_file, "direction: right")?;
+        if let Some(qg) = logger.initial_query {
+            writeln!(&mut self.index_file, "Initial Query Graph: {{")?;
+            self.write_query_graph(&qg)?;
+            writeln!(&mut self.index_file, "}}")?;
+        }
+        if let Some(qg) = logger.query_for_universe {
+            writeln!(&mut self.index_file, "Query Graph Used To Compute Universe: {{")?;
+            self.write_query_graph(&qg)?;
+            writeln!(&mut self.index_file, "}}")?;
+        }
+        let Some(ranking_rules_ids) = logger.ranking_rules_ids else { return Ok(()) };
+        writeln!(&mut self.index_file, "Control Flow Between Ranking Rules: {{")?;
+        writeln!(&mut self.index_file, "shape: sequence_diagram")?;
+        writeln!(&mut self.index_file, "start")?;
+        for (idx, rr_id) in ranking_rules_ids.iter().enumerate() {
+            writeln!(&mut self.index_file, "{idx}: {rr_id}")?;
+        }
+        writeln!(&mut self.index_file, "results")?;
+        for event in logger.events {
+            self.write_event(event)?;
+        }
+        writeln!(&mut self.index_file, "}}")?;
+        Ok(())
+    }
+
+    fn write_event(&mut self, e: SearchEvents) -> Result<()> {
+        match e {
+            SearchEvents::RankingRuleStartIteration { ranking_rule_idx, universe_len } => {
+                assert!(ranking_rule_idx == self.rr_action_counter.len());
+                self.write_start_iteration(universe_len)?;
+            }
+            SearchEvents::RankingRuleNextBucket { ranking_rule_idx, universe_len, bucket_len } => {
+                assert!(ranking_rule_idx == self.rr_action_counter.len() - 1);
+                self.write_next_bucket(bucket_len, universe_len)?;
+            }
+            SearchEvents::RankingRuleSkipBucket { ranking_rule_idx, bucket_len } => {
+                assert!(ranking_rule_idx == self.rr_action_counter.len() - 1);
+                self.write_skip_bucket(bucket_len)?;
+            }
+            SearchEvents::RankingRuleEndIteration { ranking_rule_idx } => {
+                assert!(ranking_rule_idx == self.rr_action_counter.len() - 1);
+                self.write_end_iteration()?;
+            }
+            SearchEvents::ExtendResults { new } => {
+                self.write_extend_results(new)?;
+            }
+            SearchEvents::ProximityGraph { graph } => self.write_rr_graph(&graph)?,
+            SearchEvents::ProximityPaths { paths } => {
+                self.write_rr_graph_paths::<ProximityGraph>(paths)?;
+            }
+            SearchEvents::TypoGraph { graph } => self.write_rr_graph(&graph)?,
+            SearchEvents::TypoPaths { paths } => {
+                self.write_rr_graph_paths::<TypoGraph>(paths)?;
+            }
+            SearchEvents::WordsGraph { graph } => self.write_rr_graph(&graph)?,
+            SearchEvents::WordsPaths { paths } => {
+                self.write_rr_graph_paths::<WordsGraph>(paths)?;
+            }
+            SearchEvents::FidGraph { graph } => self.write_rr_graph(&graph)?,
+            SearchEvents::FidPaths { paths } => {
+                self.write_rr_graph_paths::<FidGraph>(paths)?;
+            }
+            SearchEvents::PositionGraph { graph } => self.write_rr_graph(&graph)?,
+            SearchEvents::PositionPaths { paths } => {
+                self.write_rr_graph_paths::<PositionGraph>(paths)?;
+            }
+        }
+        Ok(())
+    }
+    fn write_query_graph(&mut self, qg: &QueryGraph) -> Result<()> {
+        writeln!(self.cur_file(), "direction: right")?;
+        for (node_id, node) in qg.nodes.iter() {
+            if matches!(node.data, QueryNodeData::Deleted) {
+                continue;
+            }
+            self.write_query_node(node_id, node)?;
+
+            for edge in node.successors.iter() {
+                writeln!(self.cur_file(), "{node_id} -> {edge};\n").unwrap();
+            }
+        }
+        Ok(())
+    }
+
+    fn write_start_iteration(&mut self, _universe_len: u64) -> Result<()> {
+        let parent_action_id = self.id_of_last_rr_action();
+        self.push_new_rr_action();
+        let self_action_id = self.id_of_last_rr_action();
+        writeln!(&mut self.index_file, "{parent_action_id} -> {self_action_id} : start iteration")?;
+        writeln!(
+            &mut self.index_file,
+            "{self_action_id} {{
+style {{
+fill: \"#D8A7B1\"
+}}
+}}"
+        )?;
+
+        Ok(())
+    }
+    fn write_next_bucket(&mut self, bucket_len: u64, universe_len: u64) -> Result<()> {
+        let cur_action_id = self.id_of_last_rr_action();
+        self.increment_cur_rr_action();
+        let next_action_id = self.id_of_last_rr_action();
+        writeln!(
+            &mut self.index_file,
+            "{cur_action_id} -> {next_action_id} : next bucket {bucket_len}/{universe_len}"
+        )?;
+
+        Ok(())
+    }
+    fn write_skip_bucket(&mut self, bucket_len: u64) -> Result<()> {
+        let cur_action_id = self.id_of_last_rr_action();
+        self.increment_cur_rr_action();
+        let next_action_id = self.id_of_last_rr_action();
+        writeln!(
+            &mut self.index_file,
+            "{cur_action_id} -> {next_action_id} : skip bucket ({bucket_len})"
+        )?;
+
+        Ok(())
+    }
+    fn write_end_iteration(&mut self) -> Result<()> {
+        let cur_action_id = self.id_of_last_rr_action();
+        self.pop_rr_action();
+        let parent_action_id = self.id_of_last_rr_action();
+
+        writeln!(&mut self.index_file, "{cur_action_id} -> {parent_action_id} : end iteration",)?;
+        Ok(())
+    }
+    fn write_extend_results(&mut self, new: Vec<u32>) -> Result<()> {
+        if new.is_empty() {
+            return Ok(());
+        }
+
+        let cur_action_id = self.id_of_last_rr_action();
+        let results_id = self.id_of_extend_results();
+        let docids = new.iter().collect::<Vec<_>>();
+        let len = new.len();
+
+        writeln!(
+            &mut self.index_file,
+            "{cur_action_id} -> {results_id} : \"add {len}\"
+{results_id} {{
+tooltip: \"{docids:?}\"
+style {{
+fill: \"#B6E2D3\"
+}}
+}}
+"
+        )?;
+        Ok(())
+    }
+
+    fn write_query_node(&mut self, node_idx: Interned<QueryNode>, node: &QueryNode) -> Result<()> {
+        let Self {
+            ctx, index_file, file_for_internal_state: active_ranking_rule_state_file, ..
+        } = self;
+        let file = if let Some(file) = active_ranking_rule_state_file.as_mut() {
+            file
+        } else {
+            index_file
+        };
+        match &node.data {
+            QueryNodeData::Term(LocatedQueryTermSubset {
+                term_subset,
+                positions: _,
+                term_ids: _,
+            }) => {
+                writeln!(
+                    file,
+                    "{node_idx} : \"{}\" {{
+                shape: class
+                max_nbr_typo: {}",
+                    term_subset.description(ctx),
+                    term_subset.max_typo_cost(ctx)
+                )?;
+
+                for w in term_subset.all_single_words_except_prefix_db(ctx)? {
+                    let w = ctx.word_interner.get(w.interned());
+                    writeln!(file, "{w}: word")?;
+                }
+                for p in term_subset.all_phrases(ctx)? {
+                    writeln!(file, "{}: phrase", p.description(ctx))?;
+                }
+                if let Some(w) = term_subset.use_prefix_db(ctx) {
+                    let w = ctx.word_interner.get(w.interned());
+                    writeln!(file, "{w}: prefix db")?;
+                }
+
+                writeln!(file, "}}")?;
+            }
+            QueryNodeData::Deleted => panic!(),
+            QueryNodeData::Start => {
+                writeln!(file, "{node_idx} : START")?;
+            }
+            QueryNodeData::End => {
+                writeln!(file, "{node_idx} : END")?;
+            }
+        }
+        Ok(())
+    }
+    fn write_rr_graph<R: RankingRuleGraphTrait>(
+        &mut self,
+        graph: &RankingRuleGraph<R>,
+    ) -> Result<()> {
+        self.make_new_file_for_internal_state_if_needed()?;
+
+        writeln!(self.cur_file(), "direction: right")?;
+
+        writeln!(self.cur_file(), "Graph {{")?;
+        for (node_idx, node) in graph.query_graph.nodes.iter() {
+            if matches!(&node.data, QueryNodeData::Deleted) {
+                continue;
+            }
+            self.write_query_node(node_idx, node)?;
+        }
+        for (_edge_id, edge) in graph.edges_store.iter() {
+            let Some(edge) = edge else { continue };
+            let Edge { source_node, dest_node, condition: details, cost, nodes_to_skip: _ } = edge;
+
+            match &details {
+                None => {
+                    writeln!(
+                        self.cur_file(),
+                        "{source_node} -> {dest_node} : \"always cost {cost}\"",
+                    )?;
+                }
+                Some(condition) => {
+                    writeln!(
+                        self.cur_file(),
+                        "{source_node} -> {dest_node} : \"{condition} cost {cost}\"",
+                        cost = edge.cost,
+                    )?;
+                }
+            }
+        }
+        writeln!(self.cur_file(), "}}")?;
+
+        Ok(())
+    }
+
+    fn write_rr_graph_paths<R: RankingRuleGraphTrait>(
+        &mut self,
+        paths: Vec<Vec<Interned<R::Condition>>>,
+    ) -> Result<()> {
+        self.make_new_file_for_internal_state_if_needed()?;
+        let file = if let Some(file) = self.file_for_internal_state.as_mut() {
+            file
+        } else {
+            &mut self.index_file
+        };
+        writeln!(file, "Path {{")?;
+        for (path_idx, condition_indexes) in paths.iter().enumerate() {
+            writeln!(file, "{path_idx} {{")?;
+            for condition in condition_indexes.iter() {
+                writeln!(file, "{condition}")?;
+            }
+            for couple_edges in condition_indexes.windows(2) {
+                let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() };
+                writeln!(file, "{src_edge_idx} -> {dest_edge_idx}")?;
+            }
+            writeln!(file, "}}")?;
+        }
+        writeln!(file, "}}")?;
+        Ok(())
+    }
+}
--- a/crates/milli/src/search/new/matches/best_match_interval.rs
+++ b/crates/milli/src/search/new/matches/best_match_interval.rs
@ -0,0 +1,139 @@
+use super::matching_words::WordId;
+use super::{Match, MatchPosition};
+
+struct MatchIntervalWithScore {
+    interval: [usize; 2],
+    score: [i16; 3],
+}
+
+// count score for phrases
+fn tally_phrase_scores(fwp: &usize, lwp: &usize, order_score: &mut i16, distance_score: &mut i16) {
+    let words_in_phrase_minus_one = (lwp - fwp) as i16;
+    // will always be ordered, so +1 for each space between words
+    *order_score += words_in_phrase_minus_one;
+    // distance will always be 1, so -1 for each space between words
+    *distance_score -= words_in_phrase_minus_one;
+}
+
+/// Compute the score of a match interval:
+/// 1) count unique matches
+/// 2) calculate distance between matches
+/// 3) count ordered matches
+fn get_interval_score(matches: &[Match]) -> [i16; 3] {
+    let mut ids: Vec<WordId> = Vec::with_capacity(matches.len());
+    let mut order_score = 0;
+    let mut distance_score = 0;
+
+    let mut iter = matches.iter().peekable();
+    while let Some(m) = iter.next() {
+        if let Some(next_match) = iter.peek() {
+            // if matches are ordered
+            if next_match.ids.iter().min() > m.ids.iter().min() {
+                order_score += 1;
+            }
+
+            let m_last_word_pos = match m.position {
+                MatchPosition::Word { word_position, .. } => word_position,
+                MatchPosition::Phrase { word_positions: [fwp, lwp], .. } => {
+                    tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
+                    lwp
+                }
+            };
+            let next_match_first_word_pos = next_match.get_first_word_pos();
+
+            // compute distance between matches
+            distance_score -= (next_match_first_word_pos - m_last_word_pos).min(7) as i16;
+        } else if let MatchPosition::Phrase { word_positions: [fwp, lwp], .. } = m.position {
+            // in case last match is a phrase, count score for its words
+            tally_phrase_scores(&fwp, &lwp, &mut order_score, &mut distance_score);
+        }
+
+        ids.extend(m.ids.iter());
+    }
+
+    ids.sort_unstable();
+    ids.dedup();
+    let uniq_score = ids.len() as i16;
+
+    // rank by unique match count, then by distance between matches, then by ordered match count.
+    [uniq_score, distance_score, order_score]
+}
+
+/// Returns the first and last match where the score computed by match_interval_score is the best.
+pub fn find_best_match_interval(matches: &[Match], crop_size: usize) -> [&Match; 2] {
+    if matches.is_empty() {
+        panic!("`matches` should not be empty at this point");
+    }
+
+    // positions of the first and the last match of the best matches interval in `matches`.
+    let mut best_interval: Option<MatchIntervalWithScore> = None;
+
+    let mut save_best_interval = |interval_first, interval_last| {
+        let interval_score = get_interval_score(&matches[interval_first..=interval_last]);
+        let is_interval_score_better = &best_interval
+            .as_ref()
+            .map_or(true, |MatchIntervalWithScore { score, .. }| interval_score > *score);
+
+        if *is_interval_score_better {
+            best_interval = Some(MatchIntervalWithScore {
+                interval: [interval_first, interval_last],
+                score: interval_score,
+            });
+        }
+    };
+
+    // we compute the matches interval if we have at least 2 matches.
+    // current interval positions.
+    let mut interval_first = 0;
+    let mut interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();
+
+    for (index, next_match) in matches.iter().enumerate() {
+        // if next match would make interval gross more than crop_size,
+        // we compare the current interval with the best one,
+        // then we increase `interval_first` until next match can be added.
+        let next_match_last_word_pos = next_match.get_last_word_pos();
+
+        // if the next match would mean that we pass the crop size window,
+        // we take the last valid match, that didn't pass this boundry, which is `index` - 1,
+        // and calculate a score for it, and check if it's better than our best so far
+        if next_match_last_word_pos - interval_first_match_first_word_pos >= crop_size {
+            // if index is 0 there is no last viable match
+            if index != 0 {
+                let interval_last = index - 1;
+                // keep interval if it's the best
+                save_best_interval(interval_first, interval_last);
+            }
+
+            // advance start of the interval while interval is longer than crop_size.
+            loop {
+                interval_first += 1;
+                if interval_first == matches.len() {
+                    interval_first -= 1;
+                    break;
+                }
+
+                interval_first_match_first_word_pos = matches[interval_first].get_first_word_pos();
+
+                if interval_first_match_first_word_pos > next_match_last_word_pos
+                    || next_match_last_word_pos - interval_first_match_first_word_pos < crop_size
+                {
+                    break;
+                }
+            }
+        }
+    }
+
+    // compute the last interval score and compare it to the best one.
+    let interval_last = matches.len() - 1;
+    // if it's the last match with itself, we need to make sure it's
+    // not a phrase longer than the crop window
+    if interval_first != interval_last || matches[interval_first].get_word_count() < crop_size {
+        save_best_interval(interval_first, interval_last);
+    }
+
+    // if none of the matches fit the criteria above, default to the first one
+    best_interval.map_or(
+        [&matches[0], &matches[0]],
+        |MatchIntervalWithScore { interval: [first, last], .. }| [&matches[first], &matches[last]],
+    )
+}
--- a/crates/milli/src/search/new/matches/match.rs
+++ b/crates/milli/src/search/new/matches/match.rs
@ -0,0 +1,62 @@
+use super::matching_words::WordId;
+
+#[derive(Clone, Debug)]
+pub enum MatchPosition {
+    Word {
+        // position of the word in the whole text.
+        word_position: usize,
+        // position of the token in the whole text.
+        token_position: usize,
+    },
+    Phrase {
+        // position of the first and last word in the phrase in the whole text.
+        word_positions: [usize; 2],
+        // position of the first and last token in the phrase in the whole text.
+        token_positions: [usize; 2],
+    },
+}
+
+#[derive(Clone, Debug)]
+pub struct Match {
+    pub char_count: usize,
+    // ids of the query words that matches.
+    pub ids: Vec<WordId>,
+    pub position: MatchPosition,
+}
+
+impl Match {
+    pub(super) fn get_first_word_pos(&self) -> usize {
+        match self.position {
+            MatchPosition::Word { word_position, .. } => word_position,
+            MatchPosition::Phrase { word_positions: [fwp, _], .. } => fwp,
+        }
+    }
+
+    pub(super) fn get_last_word_pos(&self) -> usize {
+        match self.position {
+            MatchPosition::Word { word_position, .. } => word_position,
+            MatchPosition::Phrase { word_positions: [_, lwp], .. } => lwp,
+        }
+    }
+
+    pub(super) fn get_first_token_pos(&self) -> usize {
+        match self.position {
+            MatchPosition::Word { token_position, .. } => token_position,
+            MatchPosition::Phrase { token_positions: [ftp, _], .. } => ftp,
+        }
+    }
+
+    pub(super) fn get_last_token_pos(&self) -> usize {
+        match self.position {
+            MatchPosition::Word { token_position, .. } => token_position,
+            MatchPosition::Phrase { token_positions: [_, ltp], .. } => ltp,
+        }
+    }
+
+    pub(super) fn get_word_count(&self) -> usize {
+        match self.position {
+            MatchPosition::Word { .. } => 1,
+            MatchPosition::Phrase { word_positions: [fwp, lwp], .. } => lwp - fwp + 1,
+        }
+    }
+}
--- a/crates/milli/src/search/new/matches/matching_words.rs
+++ b/crates/milli/src/search/new/matches/matching_words.rs
@ -0,0 +1,331 @@
+use std::cmp::Reverse;
+use std::fmt;
+use std::ops::RangeInclusive;
+
+use charabia::Token;
+
+use super::super::interner::Interned;
+use super::super::query_term::LocatedQueryTerm;
+use super::super::{DedupInterner, Phrase};
+use crate::SearchContext;
+
+pub struct LocatedMatchingPhrase {
+    pub value: Interned<Phrase>,
+    pub positions: RangeInclusive<WordId>,
+}
+
+pub struct LocatedMatchingWords {
+    pub value: Vec<Interned<String>>,
+    pub positions: RangeInclusive<WordId>,
+    pub is_prefix: bool,
+    pub original_char_count: usize,
+}
+
+/// Structure created from a query tree
+/// referencing words that match the given query tree.
+#[derive(Default)]
+pub struct MatchingWords {
+    word_interner: DedupInterner<String>,
+    phrase_interner: DedupInterner<Phrase>,
+    phrases: Vec<LocatedMatchingPhrase>,
+    words: Vec<LocatedMatchingWords>,
+}
+
+impl MatchingWords {
+    pub fn new(ctx: SearchContext<'_>, located_terms: Vec<LocatedQueryTerm>) -> Self {
+        let mut phrases = Vec::new();
+        let mut words = Vec::new();
+
+        // Extract and centralize the different phrases and words to match stored in a QueryTerm
+        // and wrap them in dedicated structures.
+        for located_term in located_terms {
+            let term = ctx.term_interner.get(located_term.value);
+            let (matching_words, matching_phrases) = term.all_computed_derivations();
+
+            for matching_phrase in matching_phrases {
+                phrases.push(LocatedMatchingPhrase {
+                    value: matching_phrase,
+                    positions: located_term.positions.clone(),
+                });
+            }
+
+            words.push(LocatedMatchingWords {
+                value: matching_words,
+                positions: located_term.positions.clone(),
+                is_prefix: term.is_prefix(),
+                original_char_count: term.original_word(&ctx).chars().count(),
+            });
+        }
+
+        // Sort word to put prefixes at the bottom prioritizing the exact matches.
+        words.sort_unstable_by_key(|lmw| (lmw.is_prefix, Reverse(lmw.positions.len())));
+
+        Self {
+            phrases,
+            words,
+            word_interner: ctx.word_interner,
+            phrase_interner: ctx.phrase_interner,
+        }
+    }
+
+    /// Returns an iterator over terms that match or partially match the given token.
+    pub fn match_token<'a, 'b>(&'a self, token: &'b Token<'b>) -> MatchesIter<'a, 'b> {
+        MatchesIter { matching_words: self, phrases: Box::new(self.phrases.iter()), token }
+    }
+
+    /// Try to match the token with one of the located_words.
+    fn match_unique_words<'a>(&'a self, token: &Token<'_>) -> Option<MatchType<'a>> {
+        for located_words in &self.words {
+            for word in &located_words.value {
+                let word = self.word_interner.get(*word);
+                // if the word is a prefix we match using starts_with.
+                if located_words.is_prefix && token.lemma().starts_with(word) {
+                    let Some((char_index, c)) =
+                        word.char_indices().take(located_words.original_char_count).last()
+                    else {
+                        continue;
+                    };
+                    let prefix_length = char_index + c.len_utf8();
+                    let (char_count, byte_len) = token.original_lengths(prefix_length);
+                    let ids = &located_words.positions;
+                    return Some(MatchType::Full { ids, char_count, byte_len });
+                // else we exact match the token.
+                } else if token.lemma() == word {
+                    let ids = &located_words.positions;
+                    return Some(MatchType::Full {
+                        char_count: token.char_end - token.char_start,
+                        byte_len: token.byte_end - token.byte_start,
+                        ids,
+                    });
+                }
+            }
+        }
+
+        None
+    }
+}
+
+/// Iterator over terms that match the given token,
+/// This allow to lazily evaluate matches.
+pub struct MatchesIter<'a, 'b> {
+    matching_words: &'a MatchingWords,
+    phrases: Box<dyn Iterator<Item = &'a LocatedMatchingPhrase> + 'a>,
+    token: &'b Token<'b>,
+}
+
+impl<'a> Iterator for MatchesIter<'a, '_> {
+    type Item = MatchType<'a>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        match self.phrases.next() {
+            // Try to match all the phrases first.
+            Some(located_phrase) => {
+                let phrase = self.matching_words.phrase_interner.get(located_phrase.value);
+
+                // create a PartialMatch struct to make it compute the first match
+                // instead of duplicating the code.
+                let ids = &located_phrase.positions;
+                // collect the references of words from the interner.
+                let words = phrase
+                    .words
+                    .iter()
+                    .map(|word| {
+                        word.map(|word| self.matching_words.word_interner.get(word).as_str())
+                    })
+                    .collect();
+                let partial = PartialMatch { matching_words: words, ids };
+
+                partial.match_token(self.token).or_else(|| self.next())
+            }
+            // If no phrases matches, try to match uiques words.
+            None => self.matching_words.match_unique_words(self.token),
+        }
+    }
+}
+
+/// Id of a matching term corespounding to a word written by the end user.
+pub type WordId = u16;
+
+/// A given token can partially match a query word for several reasons:
+/// - split words
+/// - multi-word synonyms
+/// In these cases we need to match consecutively several tokens to consider that the match is full.
+#[derive(Debug, PartialEq)]
+pub enum MatchType<'a> {
+    Full { char_count: usize, byte_len: usize, ids: &'a RangeInclusive<WordId> },
+    Partial(PartialMatch<'a>),
+}
+
+/// Structure helper to match several tokens in a row in order to complete a partial match.
+#[derive(Debug, PartialEq)]
+pub struct PartialMatch<'a> {
+    matching_words: Vec<Option<&'a str>>,
+    ids: &'a RangeInclusive<WordId>,
+}
+
+impl<'a> PartialMatch<'a> {
+    /// Returns:
+    /// - None if the given token breaks the partial match
+    /// - Partial if the given token matches the partial match but doesn't complete it
+    /// - Full if the given token completes the partial match
+    pub fn match_token(self, token: &Token<'_>) -> Option<MatchType<'a>> {
+        let Self { mut matching_words, ids, .. } = self;
+
+        let is_matching = match matching_words.first()? {
+            Some(word) => &token.lemma() == word,
+            // a None value in the phrase corresponds to a stop word,
+            // the walue is considered a match if the current token is categorized as a stop word.
+            None => token.is_stopword(),
+        };
+
+        // if there are remaining words to match in the phrase and the current token is matching,
+        // return a new Partial match allowing the highlighter to continue.
+        if is_matching && matching_words.len() > 1 {
+            matching_words.remove(0);
+            Some(MatchType::Partial(Self { matching_words, ids }))
+        // if there is no remaining word to match in the phrase and the current token is matching,
+        // return a Full match.
+        } else if is_matching {
+            Some(MatchType::Full {
+                char_count: token.char_end - token.char_start,
+                byte_len: token.byte_end - token.byte_start,
+                ids,
+            })
+        // if the current token doesn't match, return None to break the match sequence.
+        } else {
+            None
+        }
+    }
+}
+
+impl fmt::Debug for MatchingWords {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let MatchingWords { word_interner, phrase_interner, phrases, words } = self;
+
+        let phrases: Vec<_> = phrases
+            .iter()
+            .map(|p| {
+                (
+                    phrase_interner
+                        .get(p.value)
+                        .words
+                        .iter()
+                        .map(|w| w.map_or("STOP_WORD", |w| word_interner.get(w)))
+                        .collect::<Vec<_>>()
+                        .join(" "),
+                    p.positions.clone(),
+                )
+            })
+            .collect();
+
+        let words: Vec<_> = words
+            .iter()
+            .flat_map(|w| {
+                w.value
+                    .iter()
+                    .map(|s| (word_interner.get(*s), w.positions.clone(), w.is_prefix))
+                    .collect::<Vec<_>>()
+            })
+            .collect();
+
+        f.debug_struct("MatchingWords").field("phrases", &phrases).field("words", &words).finish()
+    }
+}
+
+#[cfg(test)]
+pub(crate) mod tests {
+    use std::borrow::Cow;
+
+    use charabia::{TokenKind, TokenizerBuilder};
+
+    use super::super::super::located_query_terms_from_tokens;
+    use super::*;
+    use crate::index::tests::TempIndex;
+    use crate::search::new::query_term::ExtractedTokens;
+
+    pub(crate) fn temp_index_with_documents() -> TempIndex {
+        let temp_index = TempIndex::new();
+        temp_index
+            .add_documents(documents!([
+                { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
+                { "id": 2, "name": "Westfália" },
+                { "id": 3, "name": "Ŵôřlḑôle" },
+            ]))
+            .unwrap();
+        temp_index
+    }
+
+    #[test]
+    fn matching_words() {
+        let temp_index = temp_index_with_documents();
+        let rtxn = temp_index.read_txn().unwrap();
+        let mut ctx = SearchContext::new(&temp_index, &rtxn).unwrap();
+        let mut builder = TokenizerBuilder::default();
+        let tokenizer = builder.build();
+        let tokens = tokenizer.tokenize("split this world");
+        let ExtractedTokens { query_terms, .. } =
+            located_query_terms_from_tokens(&mut ctx, tokens, None).unwrap();
+        let matching_words = MatchingWords::new(ctx, query_terms);
+
+        assert_eq!(
+            matching_words
+                .match_token(&Token {
+                    kind: TokenKind::Word,
+                    lemma: Cow::Borrowed("split"),
+                    char_end: "split".chars().count(),
+                    byte_end: "split".len(),
+                    ..Default::default()
+                })
+                .next(),
+            Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(0..=0) })
+        );
+        assert_eq!(
+            matching_words
+                .match_token(&Token {
+                    kind: TokenKind::Word,
+                    lemma: Cow::Borrowed("nyc"),
+                    char_end: "nyc".chars().count(),
+                    byte_end: "nyc".len(),
+                    ..Default::default()
+                })
+                .next(),
+            None
+        );
+        assert_eq!(
+            matching_words
+                .match_token(&Token {
+                    kind: TokenKind::Word,
+                    lemma: Cow::Borrowed("world"),
+                    char_end: "world".chars().count(),
+                    byte_end: "world".len(),
+                    ..Default::default()
+                })
+                .next(),
+            Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(2..=2) })
+        );
+        assert_eq!(
+            matching_words
+                .match_token(&Token {
+                    kind: TokenKind::Word,
+                    lemma: Cow::Borrowed("worlded"),
+                    char_end: "worlded".chars().count(),
+                    byte_end: "worlded".len(),
+                    ..Default::default()
+                })
+                .next(),
+            Some(MatchType::Full { char_count: 5, byte_len: 5, ids: &(2..=2) })
+        );
+        assert_eq!(
+            matching_words
+                .match_token(&Token {
+                    kind: TokenKind::Word,
+                    lemma: Cow::Borrowed("thisnew"),
+                    char_end: "thisnew".chars().count(),
+                    byte_end: "thisnew".len(),
+                    ..Default::default()
+                })
+                .next(),
+            None
+        );
+    }
+}
--- a/crates/milli/src/search/new/matches/mod.rs
+++ b/crates/milli/src/search/new/matches/mod.rs
@ -0,0 +1,929 @@
+mod best_match_interval;
+mod r#match;
+mod matching_words;
+mod simple_token_kind;
+
+use charabia::{Language, SeparatorKind, Token, Tokenizer};
+use either::Either;
+pub use matching_words::MatchingWords;
+use matching_words::{MatchType, PartialMatch};
+use r#match::{Match, MatchPosition};
+use serde::Serialize;
+use simple_token_kind::SimpleTokenKind;
+use std::{
+    borrow::Cow,
+    cmp::{max, min},
+};
+
+const DEFAULT_CROP_MARKER: &str = "…";
+const DEFAULT_HIGHLIGHT_PREFIX: &str = "<em>";
+const DEFAULT_HIGHLIGHT_SUFFIX: &str = "</em>";
+
+/// Structure used to build a Matcher allowing to customize formating tags.
+pub struct MatcherBuilder<'m> {
+    matching_words: MatchingWords,
+    tokenizer: Tokenizer<'m>,
+    crop_marker: Option<String>,
+    highlight_prefix: Option<String>,
+    highlight_suffix: Option<String>,
+}
+
+impl<'m> MatcherBuilder<'m> {
+    pub fn new(matching_words: MatchingWords, tokenizer: Tokenizer<'m>) -> Self {
+        Self {
+            matching_words,
+            tokenizer,
+            crop_marker: None,
+            highlight_prefix: None,
+            highlight_suffix: None,
+        }
+    }
+
+    pub fn crop_marker(&mut self, marker: String) -> &Self {
+        self.crop_marker = Some(marker);
+        self
+    }
+
+    pub fn highlight_prefix(&mut self, prefix: String) -> &Self {
+        self.highlight_prefix = Some(prefix);
+        self
+    }
+
+    pub fn highlight_suffix(&mut self, suffix: String) -> &Self {
+        self.highlight_suffix = Some(suffix);
+        self
+    }
+
+    pub fn build<'t, 'lang>(
+        &self,
+        text: &'t str,
+        locales: Option<&'lang [Language]>,
+    ) -> Matcher<'t, 'm, '_, 'lang> {
+        let crop_marker = match &self.crop_marker {
+            Some(marker) => marker.as_str(),
+            None => DEFAULT_CROP_MARKER,
+        };
+
+        let highlight_prefix = match &self.highlight_prefix {
+            Some(marker) => marker.as_str(),
+            None => DEFAULT_HIGHLIGHT_PREFIX,
+        };
+        let highlight_suffix = match &self.highlight_suffix {
+            Some(marker) => marker.as_str(),
+            None => DEFAULT_HIGHLIGHT_SUFFIX,
+        };
+        Matcher {
+            text,
+            matching_words: &self.matching_words,
+            tokenizer: &self.tokenizer,
+            crop_marker,
+            highlight_prefix,
+            highlight_suffix,
+            matches: None,
+            locales,
+        }
+    }
+}
+
+#[derive(Copy, Clone, Default, Debug)]
+pub struct FormatOptions {
+    pub highlight: bool,
+    pub crop: Option<usize>,
+}
+
+impl FormatOptions {
+    pub fn merge(self, other: Self) -> Self {
+        Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) }
+    }
+
+    pub fn should_format(&self) -> bool {
+        self.highlight || self.crop.is_some()
+    }
+}
+
+#[derive(Serialize, Debug, Clone, PartialEq, Eq)]
+pub struct MatchBounds {
+    pub start: usize,
+    pub length: usize,
+}
+
+/// Structure used to analyze a string, compute words that match,
+/// and format the source string, returning a highlighted and cropped sub-string.
+pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
+    text: &'t str,
+    matching_words: &'b MatchingWords,
+    tokenizer: &'b Tokenizer<'tokenizer>,
+    locales: Option<&'lang [Language]>,
+    crop_marker: &'b str,
+    highlight_prefix: &'b str,
+    highlight_suffix: &'b str,
+    matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
+}
+
+impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
+    /// Iterates over tokens and save any of them that matches the query.
+    fn compute_matches(&mut self) -> &mut Self {
+        /// some words are counted as matches only if they are close together and in the good order,
+        /// compute_partial_match peek into next words to validate if the match is complete.
+        fn compute_partial_match<'a>(
+            mut partial: PartialMatch<'a>,
+            first_token_position: usize,
+            first_word_position: usize,
+            first_word_char_start: &usize,
+            words_positions: &mut impl Iterator<Item = (usize, usize, &'a Token<'a>)>,
+            matches: &mut Vec<Match>,
+        ) -> bool {
+            for (token_position, word_position, word) in words_positions {
+                partial = match partial.match_token(word) {
+                    // token matches the partial match, but the match is not full,
+                    // we temporarily save the current token then we try to match the next one.
+                    Some(MatchType::Partial(partial)) => partial,
+                    // partial match is now full, we keep this matches and we advance positions
+                    Some(MatchType::Full { ids, .. }) => {
+                        // save the token that closes the partial match as a match.
+                        matches.push(Match {
+                            char_count: word.char_end - *first_word_char_start,
+                            ids: ids.clone().collect(),
+                            position: MatchPosition::Phrase {
+                                word_positions: [first_word_position, word_position],
+                                token_positions: [first_token_position, token_position],
+                            },
+                        });
+
+                        // the match is complete, we return true.
+                        return true;
+                    }
+                    // no match, continue to next match.
+                    None => break,
+                };
+            }
+
+            // the match is not complete, we return false.
+            false
+        }
+
+        let tokens: Vec<_> =
+            self.tokenizer.tokenize_with_allow_list(self.text, self.locales).collect();
+        let mut matches = Vec::new();
+
+        let mut words_positions = tokens
+            .iter()
+            .scan((0, 0), |(token_position, word_position), token| {
+                let current_token_position = *token_position;
+                let current_word_position = *word_position;
+                *token_position += 1;
+                if !token.is_separator() {
+                    *word_position += 1;
+                }
+
+                Some((current_token_position, current_word_position, token))
+            })
+            .filter(|(_, _, token)| !token.is_separator());
+
+        while let Some((token_position, word_position, word)) = words_positions.next() {
+            for match_type in self.matching_words.match_token(word) {
+                match match_type {
+                    // we match, we save the current token as a match,
+                    // then we continue the rest of the tokens.
+                    MatchType::Full { ids, char_count, .. } => {
+                        let ids: Vec<_> = ids.clone().collect();
+                        matches.push(Match {
+                            char_count,
+                            ids,
+                            position: MatchPosition::Word { word_position, token_position },
+                        });
+                        break;
+                    }
+                    // we match partially, iterate over next tokens to check if we can complete the match.
+                    MatchType::Partial(partial) => {
+                        // if match is completed, we break the matching loop over the current token,
+                        // then we continue the rest of the tokens.
+                        let mut wp = words_positions.clone();
+                        if compute_partial_match(
+                            partial,
+                            token_position,
+                            word_position,
+                            &word.char_start,
+                            &mut wp,
+                            &mut matches,
+                        ) {
+                            words_positions = wp;
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        self.matches = Some((tokens, matches));
+        self
+    }
+
+    /// Returns boundaries of the words that match the query.
+    pub fn matches(&mut self) -> Vec<MatchBounds> {
+        match &self.matches {
+            None => self.compute_matches().matches(),
+            Some((tokens, matches)) => matches
+                .iter()
+                .map(|m| MatchBounds {
+                    start: tokens[m.get_first_token_pos()].byte_start,
+                    // TODO: Why is this in chars, while start is in bytes?
+                    length: m.char_count,
+                })
+                .collect(),
+        }
+    }
+
+    /// Returns the bounds in byte index of the crop window.
+    fn crop_bounds(&self, tokens: &[Token<'_>], matches: &[Match], crop_size: usize) -> [usize; 2] {
+        let (
+            mut remaining_words,
+            is_iterating_forward,
+            before_tokens_starting_index,
+            after_tokens_starting_index,
+        ) = if !matches.is_empty() {
+            let [matches_first, matches_last] =
+                best_match_interval::find_best_match_interval(matches, crop_size);
+
+            let matches_size =
+                matches_last.get_last_word_pos() - matches_first.get_first_word_pos() + 1;
+
+            let is_crop_size_gte_match_size = crop_size >= matches_size;
+            let is_iterating_forward = matches_size == 0 || is_crop_size_gte_match_size;
+
+            let remaining_words = if is_crop_size_gte_match_size {
+                crop_size - matches_size
+            } else {
+                // in case matches size is greater than crop size, which implies there's only one match,
+                // we count words backwards, because we have to remove words, as they're extra words outside of
+                // crop window
+                matches_size - crop_size
+            };
+
+            let after_tokens_starting_index = if matches_size == 0 {
+                0
+            } else {
+                let last_match_last_token_position_plus_one = matches_last.get_last_token_pos() + 1;
+                if last_match_last_token_position_plus_one < tokens.len() {
+                    last_match_last_token_position_plus_one
+                } else {
+                    // we have matched the end of possible tokens, there's nothing to advance
+                    tokens.len() - 1
+                }
+            };
+
+            (
+                remaining_words,
+                is_iterating_forward,
+                if is_iterating_forward { matches_first.get_first_token_pos() } else { 0 },
+                after_tokens_starting_index,
+            )
+        } else {
+            (crop_size, true, 0, 0)
+        };
+
+        // create the initial state of the crop window: 2 iterators starting from the matches positions,
+        // a reverse iterator starting from the first match token position and going towards the beginning of the text,
+        let mut before_tokens = tokens[..before_tokens_starting_index].iter().rev().peekable();
+        // an iterator ...
+        let mut after_tokens = if is_iterating_forward {
+            // ... starting from the last match token position and going towards the end of the text.
+            Either::Left(tokens[after_tokens_starting_index..].iter().peekable())
+        } else {
+            // ... starting from the last match token position and going towards the start of the text.
+            Either::Right(tokens[..=after_tokens_starting_index].iter().rev().peekable())
+        };
+
+        // grows the crop window peeking in both directions
+        // until the window contains the good number of words:
+        while remaining_words > 0 {
+            let before_token_kind = before_tokens.peek().map(SimpleTokenKind::new);
+            let after_token_kind =
+                after_tokens.as_mut().either(|v| v.peek(), |v| v.peek()).map(SimpleTokenKind::new);
+
+            match (before_token_kind, after_token_kind) {
+                // we can expand both sides.
+                (Some(before_token_kind), Some(after_token_kind)) => {
+                    match (before_token_kind, after_token_kind) {
+                        // if they are both separators and are the same kind then advance both,
+                        // or expand in the soft separator separator side.
+                        (
+                            SimpleTokenKind::Separator(before_token_separator_kind),
+                            SimpleTokenKind::Separator(after_token_separator_kind),
+                        ) => {
+                            if before_token_separator_kind == after_token_separator_kind {
+                                before_tokens.next();
+
+                                // this avoid having an ending separator before crop marker.
+                                if remaining_words > 1 {
+                                    after_tokens.next();
+                                }
+                            } else if matches!(before_token_separator_kind, SeparatorKind::Hard) {
+                                after_tokens.next();
+                            } else {
+                                before_tokens.next();
+                            }
+                        }
+                        // if one of the tokens is a word, we expend in the side of the word.
+                        // left is a word, advance left.
+                        (SimpleTokenKind::NotSeparator, SimpleTokenKind::Separator(_)) => {
+                            before_tokens.next();
+                            remaining_words -= 1;
+                        }
+                        // right is a word, advance right.
+                        (SimpleTokenKind::Separator(_), SimpleTokenKind::NotSeparator) => {
+                            after_tokens.next();
+                            remaining_words -= 1;
+                        }
+                        // both are words, advance left then right if remaining_word > 0.
+                        (SimpleTokenKind::NotSeparator, SimpleTokenKind::NotSeparator) => {
+                            before_tokens.next();
+                            remaining_words -= 1;
+
+                            if remaining_words > 0 {
+                                after_tokens.next();
+                                remaining_words -= 1;
+                            }
+                        }
+                    }
+                }
+                // the end of the text is reached, advance left.
+                (Some(before_token_kind), None) => {
+                    before_tokens.next();
+                    if matches!(before_token_kind, SimpleTokenKind::NotSeparator) {
+                        remaining_words -= 1;
+                    }
+                }
+                // the start of the text is reached, advance right.
+                (None, Some(after_token_kind)) => {
+                    after_tokens.next();
+                    if matches!(after_token_kind, SimpleTokenKind::NotSeparator) {
+                        remaining_words -= 1;
+                    }
+                }
+                // no more token to add.
+                (None, None) => break,
+            }
+        }
+
+        // finally, keep the byte index of each bound of the crop window.
+        let crop_byte_start = before_tokens.next().map_or(0, |t| t.byte_end);
+        let crop_byte_end = after_tokens.next().map_or(self.text.len(), |t| t.byte_start);
+
+        [crop_byte_start, crop_byte_end]
+    }
+
+    // Returns the formatted version of the original text.
+    pub fn format(&mut self, format_options: FormatOptions) -> Cow<'t, str> {
+        if !format_options.highlight && format_options.crop.is_none() {
+            // compute matches is not needed if no highlight nor crop is requested.
+            Cow::Borrowed(self.text)
+        } else {
+            match &self.matches {
+                Some((tokens, matches)) => {
+                    // If the text has to be cropped, crop around the best interval.
+                    let [crop_byte_start, crop_byte_end] = match format_options.crop {
+                        Some(crop_size) if crop_size > 0 => {
+                            self.crop_bounds(tokens, matches, crop_size)
+                        }
+                        _ => [0, self.text.len()],
+                    };
+
+                    let mut formatted = Vec::new();
+
+                    // push crop marker if it's not the start of the text.
+                    if crop_byte_start > 0 && !self.crop_marker.is_empty() {
+                        formatted.push(self.crop_marker);
+                    }
+
+                    let mut byte_index = crop_byte_start;
+
+                    if format_options.highlight {
+                        // insert highlight markers around matches.
+                        for m in matches {
+                            let [m_byte_start, m_byte_end] = match m.position {
+                                MatchPosition::Word { token_position, .. } => {
+                                    let token = &tokens[token_position];
+                                    [&token.byte_start, &token.byte_end]
+                                }
+                                MatchPosition::Phrase { token_positions: [ftp, ltp], .. } => {
+                                    [&tokens[ftp].byte_start, &tokens[ltp].byte_end]
+                                }
+                            };
+
+                            // skip matches out of the crop window
+                            if *m_byte_end < crop_byte_start || *m_byte_start > crop_byte_end {
+                                continue;
+                            }
+
+                            // adjust start and end to the crop window size
+                            let [m_byte_start, m_byte_end] = [
+                                max(m_byte_start, &crop_byte_start),
+                                min(m_byte_end, &crop_byte_end),
+                            ];
+
+                            // push text that is positioned before our matches
+                            if byte_index < *m_byte_start {
+                                formatted.push(&self.text[byte_index..*m_byte_start]);
+                            }
+
+                            formatted.push(self.highlight_prefix);
+
+                            // TODO: This is additional work done, charabia::token::Token byte_len
+                            // should already get us the original byte length, however, that doesn't work as
+                            // it's supposed to, investigate why
+                            let highlight_byte_index = self.text[*m_byte_start..]
+                                .char_indices()
+                                .nth(m.char_count)
+                                .map_or(*m_byte_end, |(i, _)| min(i + *m_byte_start, *m_byte_end));
+                            formatted.push(&self.text[*m_byte_start..highlight_byte_index]);
+
+                            formatted.push(self.highlight_suffix);
+
+                            // if it's a prefix highlight, we put the end of the word after the highlight marker.
+                            if highlight_byte_index < *m_byte_end {
+                                formatted.push(&self.text[highlight_byte_index..*m_byte_end]);
+                            }
+
+                            byte_index = *m_byte_end;
+                        }
+                    }
+
+                    // push the rest of the text between last match and the end of crop.
+                    if byte_index < crop_byte_end {
+                        formatted.push(&self.text[byte_index..crop_byte_end]);
+                    }
+
+                    // push crop marker if it's not the end of the text.
+                    if crop_byte_end < self.text.len() && !self.crop_marker.is_empty() {
+                        formatted.push(self.crop_marker);
+                    }
+
+                    if formatted.len() == 1 {
+                        // avoid concatenating if there is already 1 slice.
+                        Cow::Borrowed(&self.text[crop_byte_start..crop_byte_end])
+                    } else {
+                        Cow::Owned(formatted.concat())
+                    }
+                }
+                None => self.compute_matches().format(format_options),
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use charabia::TokenizerBuilder;
+    use matching_words::tests::temp_index_with_documents;
+
+    use super::*;
+    use crate::index::tests::TempIndex;
+    use crate::{execute_search, filtered_universe, SearchContext, TimeBudget};
+
+    impl<'a> MatcherBuilder<'a> {
+        fn new_test(rtxn: &'a heed::RoTxn<'a>, index: &'a TempIndex, query: &str) -> Self {
+            let mut ctx = SearchContext::new(index, rtxn).unwrap();
+            let universe = filtered_universe(ctx.index, ctx.txn, &None).unwrap();
+            let crate::search::PartialSearchResult { located_query_terms, .. } = execute_search(
+                &mut ctx,
+                Some(query),
+                crate::TermsMatchingStrategy::default(),
+                crate::score_details::ScoringStrategy::Skip,
+                false,
+                universe,
+                &None,
+                &None,
+                crate::search::new::GeoSortStrategy::default(),
+                0,
+                100,
+                Some(10),
+                &mut crate::DefaultSearchLogger,
+                &mut crate::DefaultSearchLogger,
+                TimeBudget::max(),
+                None,
+                None,
+            )
+            .unwrap();
+
+            // consume context and located_query_terms to build MatchingWords.
+            let matching_words = match located_query_terms {
+                Some(located_query_terms) => MatchingWords::new(ctx, located_query_terms),
+                None => MatchingWords::default(),
+            };
+
+            MatcherBuilder::new(matching_words, TokenizerBuilder::default().into_tokenizer())
+        }
+    }
+
+    #[test]
+    fn format_identity() {
+        let temp_index = temp_index_with_documents();
+        let rtxn = temp_index.read_txn().unwrap();
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
+
+        let format_options = FormatOptions { highlight: false, crop: None };
+
+        // Text without any match.
+        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
+        let mut matcher = builder.build(text, None);
+        // no crop and no highlight should return complete text.
+        assert_eq!(&matcher.format(format_options), &text);
+
+        // Text containing all matches.
+        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
+        let mut matcher = builder.build(text, None);
+        // no crop and no highlight should return complete text.
+        assert_eq!(&matcher.format(format_options), &text);
+
+        // Text containing some matches.
+        let text = "Natalie risk her future to build a world with the boy she loves.";
+        let mut matcher = builder.build(text, None);
+        // no crop and no highlight should return complete text.
+        assert_eq!(&matcher.format(format_options), &text);
+    }
+
+    #[test]
+    fn format_highlight() {
+        let temp_index = temp_index_with_documents();
+        let rtxn = temp_index.read_txn().unwrap();
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
+
+        let format_options = FormatOptions { highlight: true, crop: None };
+
+        // empty text.
+        let text = "";
+        let mut matcher = builder.build(text, None);
+        assert_eq!(&matcher.format(format_options), "");
+
+        // text containing only separators.
+        let text = ":-)";
+        let mut matcher = builder.build(text, None);
+        assert_eq!(&matcher.format(format_options), ":-)");
+
+        // Text without any match.
+        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
+        let mut matcher = builder.build(text, None);
+        // no crop should return complete text, because there is no matches.
+        assert_eq!(&matcher.format(format_options), &text);
+
+        // Text containing all matches.
+        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
+        let mut matcher = builder.build(text, None);
+        // no crop should return complete text with highlighted matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."
+        );
+
+        // Text containing some matches.
+        let text = "Natalie risk her future to build a world with the boy she loves.";
+        let mut matcher = builder.build(text, None);
+        // no crop should return complete text with highlighted matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."
+        );
+    }
+
+    #[test]
+    fn highlight_unicode() {
+        let temp_index = temp_index_with_documents();
+        let rtxn = temp_index.read_txn().unwrap();
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "world");
+        let format_options = FormatOptions { highlight: true, crop: None };
+
+        // Text containing prefix match.
+        let text = "Ŵôřlḑôle";
+        let mut matcher = builder.build(text, None);
+        // no crop should return complete text with highlighted matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"<em>Ŵôřlḑ</em>ôle"
+        );
+
+        // Text containing unicode match.
+        let text = "Ŵôřlḑ";
+        let mut matcher = builder.build(text, None);
+        // no crop should return complete text with highlighted matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"<em>Ŵôřlḑ</em>"
+        );
+
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "westfali");
+        let format_options = FormatOptions { highlight: true, crop: None };
+
+        // Text containing unicode match.
+        let text = "Westfália";
+        let mut matcher = builder.build(text, None);
+        // no crop should return complete text with highlighted matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"<em>Westfáli</em>a"
+        );
+    }
+
+    #[test]
+    fn format_crop() {
+        let temp_index = temp_index_with_documents();
+        let rtxn = temp_index.read_txn().unwrap();
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
+
+        let format_options = FormatOptions { highlight: false, crop: Some(10) };
+
+        // empty text.
+        let text = "";
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @""
+        );
+
+        // text containing only separators.
+        let text = ":-)";
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @":-)"
+        );
+
+        // Text without any match.
+        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
+        let mut matcher = builder.build(text, None);
+        // no highlight should return 10 first words with a marker at the end.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"A quick brown fox can not jump 32 feet, right…"
+        );
+
+        // Text without any match starting by a separator.
+        let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)";
+        let mut matcher = builder.build(text, None);
+        // no highlight should return 10 first words with a marker at the end.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"(A quick brown fox can not jump 32 feet, right…"
+        );
+
+        // Test phrase propagation
+        let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.";
+        let mut matcher = builder.build(text, None);
+        // should crop the phrase instead of croping around the match.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…Split The World is a book written by Emily Henry…"
+        );
+
+        // Text containing some matches.
+        let text = "Natalie risk her future to build a world with the boy she loves.";
+        let mut matcher = builder.build(text, None);
+        // no highlight should return 10 last words with a marker at the start.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…future to build a world with the boy she loves…"
+        );
+
+        // Text containing all matches.
+        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
+        let mut matcher = builder.build(text, None);
+        // no highlight should return 10 last words with a marker at the start.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…she loves. Emily Henry: The Love That Split The World."
+        );
+
+        // Text containing a match unordered and a match ordered.
+        let text = "The world split void void void void void void void void void split the world void void";
+        let mut matcher = builder.build(text, None);
+        // crop should return 10 last words with a marker at the start.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…void void void void void split the world void void"
+        );
+
+        // Text containing matches with different density.
+        let text = "split void the void void world void void void void void void void void void void split the world void void";
+        let mut matcher = builder.build(text, None);
+        // crop should return 10 last words with a marker at the start.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…void void void void void split the world void void"
+        );
+
+        // Text containing matches with same word.
+        let text = "split split split split split split void void void void void void void void void void split the world void void";
+        let mut matcher = builder.build(text, None);
+        // crop should return 10 last words with a marker at the start.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…void void void void void split the world void void"
+        );
+    }
+
+    #[test]
+    fn format_highlight_crop() {
+        let temp_index = temp_index_with_documents();
+        let rtxn = temp_index.read_txn().unwrap();
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
+
+        let format_options = FormatOptions { highlight: true, crop: Some(10) };
+
+        // empty text.
+        let text = "";
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @""
+        );
+
+        // text containing only separators.
+        let text = ":-)";
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @":-)"
+        );
+
+        // Text without any match.
+        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
+        let mut matcher = builder.build(text, None);
+        // both should return 10 first words with a marker at the end.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"A quick brown fox can not jump 32 feet, right…"
+        );
+
+        // Text containing some matches.
+        let text = "Natalie risk her future to build a world with the boy she loves.";
+        let mut matcher = builder.build(text, None);
+        // both should return 10 last words with a marker at the start and highlighted matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…future to build a <em>world</em> with <em>the</em> boy she loves…"
+        );
+
+        // Text containing all matches.
+        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
+        let mut matcher = builder.build(text, None);
+        // both should return 10 last words with a marker at the start and highlighted matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."
+        );
+
+        // Text containing a match unordered and a match ordered.
+        let text = "The world split void void void void void void void void void split the world void void";
+        let mut matcher = builder.build(text, None);
+        // crop should return 10 last words with a marker at the start.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"
+        );
+    }
+
+    #[test]
+    fn format_highlight_crop_phrase_query() {
+        //! testing: https://github.com/meilisearch/meilisearch/issues/3975
+        let temp_index = TempIndex::new();
+
+        let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
+        temp_index
+            .add_documents(documents!([
+                { "id": 1, "text": text }
+            ]))
+            .unwrap();
+
+        let rtxn = temp_index.read_txn().unwrap();
+
+        let format_options = FormatOptions { highlight: true, crop: Some(10) };
+
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
+        let mut matcher = builder.build(text, None);
+        // should return 10 words with a marker at the start as well the end, and the highlighted matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…the power to split <em>the world</em> between those who embraced…"
+        );
+
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
+        let mut matcher = builder.build(text, None);
+        // should highlight "those" and the phrase "and those".
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…world between <em>those</em> who embraced progress <em>and those</em> who resisted…"
+        );
+
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"The groundbreaking invention had the power to split the world\"",
+        );
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"<em>The groundbreaking invention had the power to split the world</em>…"
+        );
+
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"The groundbreaking invention had the power to split the world between those\"",
+        );
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"<em>The groundbreaking invention had the power to split the world</em>…"
+        );
+
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"The groundbreaking invention\" \"embraced progress and those who resisted change!\"",
+        );
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            // TODO: Should include exclamation mark without crop markers
+            @"…between those who <em>embraced progress and those who resisted change</em>…"
+        );
+
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"groundbreaking invention\" \"split the world between\"",
+        );
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…<em>groundbreaking invention</em> had the power to <em>split the world between</em>…"
+        );
+
+        let builder = MatcherBuilder::new_test(
+            &rtxn,
+            &temp_index,
+            "\"groundbreaking invention\" \"had the power to split the world between those\"",
+        );
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…<em>invention</em> <em>had the power to split the world between those</em>…"
+        );
+    }
+
+    #[test]
+    fn smaller_crop_size() {
+        //! testing: https://github.com/meilisearch/specifications/pull/120#discussion_r836536295
+        let temp_index = temp_index_with_documents();
+        let rtxn = temp_index.read_txn().unwrap();
+        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "split the world");
+
+        let text = "void void split the world void void.";
+
+        // set a smaller crop size
+        let format_options = FormatOptions { highlight: false, crop: Some(2) };
+        let mut matcher = builder.build(text, None);
+        // because crop size < query size, partially format matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…split the…"
+        );
+
+        // set a smaller crop size
+        let format_options = FormatOptions { highlight: false, crop: Some(1) };
+        let mut matcher = builder.build(text, None);
+        // because crop size < query size, partially format matches.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…split…"
+        );
+
+        // set  crop size to 0
+        let format_options = FormatOptions { highlight: false, crop: Some(0) };
+        let mut matcher = builder.build(text, None);
+        // because crop size is 0, crop is ignored.
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"void void split the world void void."
+        );
+    }
+
+    #[test]
+    fn partial_matches() {
+        let temp_index = temp_index_with_documents();
+        let rtxn = temp_index.read_txn().unwrap();
+        let mut builder =
+            MatcherBuilder::new_test(&rtxn, &temp_index, "the \"t he\" door \"do or\"");
+        builder.highlight_prefix("_".to_string());
+        builder.highlight_suffix("_".to_string());
+
+        let format_options = FormatOptions { highlight: true, crop: None };
+
+        let text = "the do or die can't be he do and or isn't he";
+        let mut matcher = builder.build(text, None);
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"_the_ _do or_ die can't be he do and or isn'_t he_"
+        );
+    }
+}
--- a/crates/milli/src/search/new/matches/simple_token_kind.rs
+++ b/crates/milli/src/search/new/matches/simple_token_kind.rs
@ -0,0 +1,15 @@
+use charabia::{SeparatorKind, Token, TokenKind};
+
+pub enum SimpleTokenKind {
+    Separator(SeparatorKind),
+    NotSeparator,
+}
+
+impl SimpleTokenKind {
+    pub fn new(token: &&Token<'_>) -> Self {
+        match token.kind {
+            TokenKind::Separator(separaor_kind) => Self::Separator(separaor_kind),
+            _ => Self::NotSeparator,
+        }
+    }
+}
--- a/crates/milli/src/search/new/mod.rs
+++ b/crates/milli/src/search/new/mod.rs
@ -0,0 +1,883 @@
+mod bucket_sort;
+mod db_cache;
+mod distinct;
+mod geo_sort;
+mod graph_based_ranking_rule;
+mod interner;
+mod limits;
+mod logger;
+pub mod matches;
+mod query_graph;
+mod query_term;
+mod ranking_rule_graph;
+mod ranking_rules;
+mod resolve_query_graph;
+mod small_bitmap;
+
+mod exact_attribute;
+mod sort;
+mod vector_sort;
+
+#[cfg(test)]
+mod tests;
+
+use std::collections::HashSet;
+
+use bucket_sort::{bucket_sort, BucketSortOutput};
+use charabia::{Language, TokenizerBuilder};
+use db_cache::DatabaseCache;
+use exact_attribute::ExactAttribute;
+use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
+use heed::RoTxn;
+use interner::{DedupInterner, Interner};
+pub use logger::visual::VisualSearchLogger;
+pub use logger::{DefaultSearchLogger, SearchLogger};
+use query_graph::{QueryGraph, QueryNode};
+use query_term::{
+    located_query_terms_from_tokens, ExtractedTokens, LocatedQueryTerm, Phrase, QueryTerm,
+};
+use ranking_rules::{
+    BoxRankingRule, PlaceholderQuery, RankingRule, RankingRuleOutput, RankingRuleQueryTrait,
+};
+use resolve_query_graph::{compute_query_graph_docids, PhraseDocIdsCache};
+use roaring::RoaringBitmap;
+use sort::Sort;
+
+use self::distinct::facet_string_values;
+use self::geo_sort::GeoSort;
+pub use self::geo_sort::Strategy as GeoSortStrategy;
+use self::graph_based_ranking_rule::Words;
+use self::interner::Interned;
+use self::vector_sort::VectorSort;
+use crate::localized_attributes_rules::LocalizedFieldIds;
+use crate::score_details::{ScoreDetails, ScoringStrategy};
+use crate::search::new::distinct::apply_distinct_rule;
+use crate::vector::Embedder;
+use crate::{
+    AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget,
+    UserError, Weight,
+};
+
+/// A structure used throughout the execution of a search query.
+pub struct SearchContext<'ctx> {
+    pub index: &'ctx Index,
+    pub txn: &'ctx RoTxn<'ctx>,
+    pub db_cache: DatabaseCache<'ctx>,
+    pub word_interner: DedupInterner<String>,
+    pub phrase_interner: DedupInterner<Phrase>,
+    pub term_interner: Interner<QueryTerm>,
+    pub phrase_docids: PhraseDocIdsCache,
+    pub restricted_fids: Option<RestrictedFids>,
+}
+
+impl<'ctx> SearchContext<'ctx> {
+    pub fn new(index: &'ctx Index, txn: &'ctx RoTxn<'ctx>) -> Result<Self> {
+        let searchable_fids = index.searchable_fields_and_weights(txn)?;
+        let exact_attributes_ids = index.exact_attributes_ids(txn)?;
+
+        let mut exact = Vec::new();
+        let mut tolerant = Vec::new();
+        for (_name, fid, weight) in searchable_fids {
+            if exact_attributes_ids.contains(&fid) {
+                exact.push((fid, weight));
+            } else {
+                tolerant.push((fid, weight));
+            }
+        }
+
+        Ok(Self {
+            index,
+            txn,
+            db_cache: <_>::default(),
+            word_interner: <_>::default(),
+            phrase_interner: <_>::default(),
+            term_interner: <_>::default(),
+            phrase_docids: <_>::default(),
+            restricted_fids: None,
+        })
+    }
+
+    pub fn attributes_to_search_on(
+        &mut self,
+        attributes_to_search_on: &'ctx [String],
+    ) -> Result<()> {
+        let user_defined_searchable = self.index.user_defined_searchable_fields(self.txn)?;
+        let searchable_fields_weights = self.index.searchable_fields_and_weights(self.txn)?;
+        let exact_attributes_ids = self.index.exact_attributes_ids(self.txn)?;
+
+        let mut wildcard = false;
+
+        let mut restricted_fids = RestrictedFids::default();
+        for field_name in attributes_to_search_on {
+            if field_name == "*" {
+                wildcard = true;
+                // we cannot early exit as we want to returns error in case of unknown fields
+                continue;
+            }
+            let searchable_weight =
+                searchable_fields_weights.iter().find(|(name, _, _)| name == field_name);
+            let (fid, weight) = match searchable_weight {
+                // The Field id exist and the field is searchable
+                Some((_name, fid, weight)) => (*fid, *weight),
+                // The field is not searchable but the user didn't define any searchable attributes
+                None if user_defined_searchable.is_none() => continue,
+                // The field is not searchable => User error
+                None => {
+                    let (valid_fields, hidden_fields) = self.index.remove_hidden_fields(
+                        self.txn,
+                        searchable_fields_weights.iter().map(|(name, _, _)| name),
+                    )?;
+
+                    let field = field_name.to_string();
+                    return Err(UserError::InvalidSearchableAttribute {
+                        field,
+                        valid_fields,
+                        hidden_fields,
+                    }
+                    .into());
+                }
+            };
+
+            if exact_attributes_ids.contains(&fid) {
+                restricted_fids.exact.push((fid, weight));
+            } else {
+                restricted_fids.tolerant.push((fid, weight));
+            };
+        }
+
+        if wildcard {
+            self.restricted_fids = None;
+        } else {
+            self.restricted_fids = Some(restricted_fids);
+        }
+
+        Ok(())
+    }
+}
+
+#[derive(Clone, Copy, PartialEq, PartialOrd, Ord, Eq)]
+pub enum Word {
+    Original(Interned<String>),
+    Derived(Interned<String>),
+}
+
+impl Word {
+    pub fn interned(&self) -> Interned<String> {
+        match self {
+            Word::Original(word) => *word,
+            Word::Derived(word) => *word,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Default)]
+pub struct RestrictedFids {
+    pub tolerant: Vec<(FieldId, Weight)>,
+    pub exact: Vec<(FieldId, Weight)>,
+}
+
+impl RestrictedFids {
+    pub fn contains(&self, fid: &FieldId) -> bool {
+        self.tolerant.iter().any(|(id, _)| id == fid) || self.exact.iter().any(|(id, _)| id == fid)
+    }
+}
+
+/// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it.
+fn resolve_maximally_reduced_query_graph(
+    ctx: &mut SearchContext<'_>,
+    universe: &RoaringBitmap,
+    query_graph: &QueryGraph,
+    matching_strategy: TermsMatchingStrategy,
+    logger: &mut dyn SearchLogger<QueryGraph>,
+) -> Result<RoaringBitmap> {
+    let mut graph = query_graph.clone();
+
+    let nodes_to_remove = match matching_strategy {
+        TermsMatchingStrategy::Last => query_graph
+            .removal_order_for_terms_matching_strategy_last(ctx)
+            .iter()
+            .flat_map(|x| x.iter())
+            .collect(),
+        TermsMatchingStrategy::Frequency => query_graph
+            .removal_order_for_terms_matching_strategy_frequency(ctx)?
+            .iter()
+            .flat_map(|x| x.iter())
+            .collect(),
+        TermsMatchingStrategy::All => vec![],
+    };
+    graph.remove_nodes_keep_edges(&nodes_to_remove);
+
+    logger.query_for_initial_universe(&graph);
+    let docids = compute_query_graph_docids(ctx, &graph, universe)?;
+
+    Ok(docids)
+}
+
+#[tracing::instrument(level = "trace", skip_all, target = "search::universe")]
+fn resolve_universe(
+    ctx: &mut SearchContext<'_>,
+    initial_universe: &RoaringBitmap,
+    query_graph: &QueryGraph,
+    matching_strategy: TermsMatchingStrategy,
+    logger: &mut dyn SearchLogger<QueryGraph>,
+) -> Result<RoaringBitmap> {
+    resolve_maximally_reduced_query_graph(
+        ctx,
+        initial_universe,
+        query_graph,
+        matching_strategy,
+        logger,
+    )
+}
+
+#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
+fn resolve_negative_words(
+    ctx: &mut SearchContext<'_>,
+    universe: Option<&RoaringBitmap>,
+    negative_words: &[Word],
+) -> Result<RoaringBitmap> {
+    let mut negative_bitmap = RoaringBitmap::new();
+    for &word in negative_words {
+        if let Some(bitmap) = ctx.word_docids(universe, word)? {
+            negative_bitmap |= bitmap;
+        }
+    }
+    Ok(negative_bitmap)
+}
+
+#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
+fn resolve_negative_phrases(
+    ctx: &mut SearchContext<'_>,
+    negative_phrases: &[LocatedQueryTerm],
+) -> Result<RoaringBitmap> {
+    let mut negative_bitmap = RoaringBitmap::new();
+    for term in negative_phrases {
+        let query_term = ctx.term_interner.get(term.value);
+        if let Some(phrase) = query_term.original_phrase() {
+            negative_bitmap |= ctx.get_phrase_docids(phrase)?;
+        }
+    }
+    Ok(negative_bitmap)
+}
+
+/// Return the list of initialised ranking rules to be used for a placeholder search.
+fn get_ranking_rules_for_placeholder_search<'ctx>(
+    ctx: &SearchContext<'ctx>,
+    sort_criteria: &Option<Vec<AscDesc>>,
+    geo_strategy: geo_sort::Strategy,
+) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
+    let mut sort = false;
+    let mut sorted_fields = HashSet::new();
+    let mut geo_sorted = false;
+    let mut ranking_rules: Vec<BoxRankingRule<'ctx, PlaceholderQuery>> = vec![];
+    let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
+    for rr in settings_ranking_rules {
+        match rr {
+            // These rules need a query to have an effect; ignore them in placeholder search
+            crate::Criterion::Words
+            | crate::Criterion::Typo
+            | crate::Criterion::Attribute
+            | crate::Criterion::Proximity
+            | crate::Criterion::Exactness => continue,
+            crate::Criterion::Sort => {
+                if sort {
+                    continue;
+                }
+                resolve_sort_criteria(
+                    sort_criteria,
+                    ctx,
+                    &mut ranking_rules,
+                    &mut sorted_fields,
+                    &mut geo_sorted,
+                    geo_strategy,
+                )?;
+                sort = true;
+            }
+            crate::Criterion::Asc(field_name) => {
+                if sorted_fields.contains(&field_name) {
+                    continue;
+                }
+                sorted_fields.insert(field_name.clone());
+                ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
+            }
+            crate::Criterion::Desc(field_name) => {
+                if sorted_fields.contains(&field_name) {
+                    continue;
+                }
+                sorted_fields.insert(field_name.clone());
+                ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, false)?));
+            }
+        }
+    }
+    Ok(ranking_rules)
+}
+
+#[allow(clippy::too_many_arguments)]
+fn get_ranking_rules_for_vector<'ctx>(
+    ctx: &SearchContext<'ctx>,
+    sort_criteria: &Option<Vec<AscDesc>>,
+    geo_strategy: geo_sort::Strategy,
+    limit_plus_offset: usize,
+    target: &[f32],
+    embedder_name: &str,
+    embedder: &Embedder,
+    quantized: bool,
+) -> Result<Vec<BoxRankingRule<'ctx, PlaceholderQuery>>> {
+    // query graph search
+
+    let mut sort = false;
+    let mut sorted_fields = HashSet::new();
+    let mut geo_sorted = false;
+
+    let mut vector = false;
+    let mut ranking_rules: Vec<BoxRankingRule<'ctx, PlaceholderQuery>> = vec![];
+
+    let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
+    for rr in settings_ranking_rules {
+        match rr {
+            crate::Criterion::Words
+            | crate::Criterion::Typo
+            | crate::Criterion::Proximity
+            | crate::Criterion::Attribute
+            | crate::Criterion::Exactness => {
+                if !vector {
+                    let vector_candidates = ctx.index.documents_ids(ctx.txn)?;
+                    let vector_sort = VectorSort::new(
+                        ctx,
+                        target.to_vec(),
+                        vector_candidates,
+                        limit_plus_offset,
+                        embedder_name,
+                        embedder,
+                        quantized,
+                    )?;
+                    ranking_rules.push(Box::new(vector_sort));
+                    vector = true;
+                }
+            }
+            crate::Criterion::Sort => {
+                if sort {
+                    continue;
+                }
+                resolve_sort_criteria(
+                    sort_criteria,
+                    ctx,
+                    &mut ranking_rules,
+                    &mut sorted_fields,
+                    &mut geo_sorted,
+                    geo_strategy,
+                )?;
+                sort = true;
+            }
+            crate::Criterion::Asc(field_name) => {
+                if sorted_fields.contains(&field_name) {
+                    continue;
+                }
+                sorted_fields.insert(field_name.clone());
+                ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
+            }
+            crate::Criterion::Desc(field_name) => {
+                if sorted_fields.contains(&field_name) {
+                    continue;
+                }
+                sorted_fields.insert(field_name.clone());
+                ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, false)?));
+            }
+        }
+    }
+
+    Ok(ranking_rules)
+}
+
+/// Return the list of initialised ranking rules to be used for a query graph search.
+fn get_ranking_rules_for_query_graph_search<'ctx>(
+    ctx: &SearchContext<'ctx>,
+    sort_criteria: &Option<Vec<AscDesc>>,
+    geo_strategy: geo_sort::Strategy,
+    terms_matching_strategy: TermsMatchingStrategy,
+) -> Result<Vec<BoxRankingRule<'ctx, QueryGraph>>> {
+    // query graph search
+    let mut words = false;
+    let mut typo = false;
+    let mut proximity = false;
+    let mut sort = false;
+    let mut attribute = false;
+    let mut exactness = false;
+    let mut sorted_fields = HashSet::new();
+    let mut geo_sorted = false;
+
+    // Don't add the `words` ranking rule if the term matching strategy is `All`
+    if matches!(terms_matching_strategy, TermsMatchingStrategy::All) {
+        words = true;
+    }
+
+    let mut ranking_rules: Vec<BoxRankingRule<'ctx, QueryGraph>> = vec![];
+    let settings_ranking_rules = ctx.index.criteria(ctx.txn)?;
+    for rr in settings_ranking_rules {
+        // Add Words before any of: typo, proximity, attribute
+        match rr {
+            crate::Criterion::Typo
+            | crate::Criterion::Attribute
+            | crate::Criterion::Proximity
+            | crate::Criterion::Exactness => {
+                if !words {
+                    ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
+                    words = true;
+                }
+            }
+            _ => {}
+        }
+        match rr {
+            crate::Criterion::Words => {
+                if words {
+                    continue;
+                }
+                ranking_rules.push(Box::new(Words::new(terms_matching_strategy)));
+                words = true;
+            }
+            crate::Criterion::Typo => {
+                if typo {
+                    continue;
+                }
+                typo = true;
+                ranking_rules.push(Box::new(Typo::new(None)));
+            }
+            crate::Criterion::Proximity => {
+                if proximity {
+                    continue;
+                }
+                proximity = true;
+                ranking_rules.push(Box::new(Proximity::new(None)));
+            }
+            crate::Criterion::Attribute => {
+                if attribute {
+                    continue;
+                }
+                attribute = true;
+                ranking_rules.push(Box::new(Fid::new(None)));
+                ranking_rules.push(Box::new(Position::new(None)));
+            }
+            crate::Criterion::Sort => {
+                if sort {
+                    continue;
+                }
+                resolve_sort_criteria(
+                    sort_criteria,
+                    ctx,
+                    &mut ranking_rules,
+                    &mut sorted_fields,
+                    &mut geo_sorted,
+                    geo_strategy,
+                )?;
+                sort = true;
+            }
+            crate::Criterion::Exactness => {
+                if exactness {
+                    continue;
+                }
+                ranking_rules.push(Box::new(ExactAttribute::new()));
+                ranking_rules.push(Box::new(Exactness::new()));
+                exactness = true;
+            }
+            crate::Criterion::Asc(field_name) => {
+                if sorted_fields.contains(&field_name) {
+                    continue;
+                }
+                sorted_fields.insert(field_name.clone());
+                ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
+            }
+            crate::Criterion::Desc(field_name) => {
+                if sorted_fields.contains(&field_name) {
+                    continue;
+                }
+                sorted_fields.insert(field_name.clone());
+                ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, false)?));
+            }
+        }
+    }
+    Ok(ranking_rules)
+}
+
+fn resolve_sort_criteria<'ctx, Query: RankingRuleQueryTrait>(
+    sort_criteria: &Option<Vec<AscDesc>>,
+    ctx: &SearchContext<'ctx>,
+    ranking_rules: &mut Vec<BoxRankingRule<'ctx, Query>>,
+    sorted_fields: &mut HashSet<String>,
+    geo_sorted: &mut bool,
+    geo_strategy: geo_sort::Strategy,
+) -> Result<()> {
+    let sort_criteria = sort_criteria.clone().unwrap_or_default();
+    ranking_rules.reserve(sort_criteria.len());
+    for criterion in sort_criteria {
+        match criterion {
+            AscDesc::Asc(Member::Field(field_name)) => {
+                if sorted_fields.contains(&field_name) {
+                    continue;
+                }
+                sorted_fields.insert(field_name.clone());
+                ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, true)?));
+            }
+            AscDesc::Desc(Member::Field(field_name)) => {
+                if sorted_fields.contains(&field_name) {
+                    continue;
+                }
+                sorted_fields.insert(field_name.clone());
+                ranking_rules.push(Box::new(Sort::new(ctx.index, ctx.txn, field_name, false)?));
+            }
+            AscDesc::Asc(Member::Geo(point)) => {
+                if *geo_sorted {
+                    continue;
+                }
+                let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
+                ranking_rules.push(Box::new(GeoSort::new(
+                    geo_strategy,
+                    geo_faceted_docids,
+                    point,
+                    true,
+                )?));
+            }
+            AscDesc::Desc(Member::Geo(point)) => {
+                if *geo_sorted {
+                    continue;
+                }
+                let geo_faceted_docids = ctx.index.geo_faceted_documents_ids(ctx.txn)?;
+                ranking_rules.push(Box::new(GeoSort::new(
+                    geo_strategy,
+                    geo_faceted_docids,
+                    point,
+                    false,
+                )?));
+            }
+        };
+    }
+    Ok(())
+}
+
+#[tracing::instrument(level = "trace", skip_all, target = "search::universe")]
+pub fn filtered_universe(
+    index: &Index,
+    txn: &RoTxn<'_>,
+    filters: &Option<Filter<'_>>,
+) -> Result<RoaringBitmap> {
+    Ok(if let Some(filters) = filters {
+        filters.evaluate(txn, index)?
+    } else {
+        index.documents_ids(txn)?
+    })
+}
+
+#[allow(clippy::too_many_arguments)]
+pub fn execute_vector_search(
+    ctx: &mut SearchContext<'_>,
+    vector: &[f32],
+    scoring_strategy: ScoringStrategy,
+    universe: RoaringBitmap,
+    sort_criteria: &Option<Vec<AscDesc>>,
+    distinct: &Option<String>,
+    geo_strategy: geo_sort::Strategy,
+    from: usize,
+    length: usize,
+    embedder_name: &str,
+    embedder: &Embedder,
+    quantized: bool,
+    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
+) -> Result<PartialSearchResult> {
+    check_sort_criteria(ctx, sort_criteria.as_ref())?;
+
+    // FIXME: input universe = universe & documents_with_vectors
+    // for now if we're computing embeddings for ALL documents, we can assume that this is just universe
+    let ranking_rules = get_ranking_rules_for_vector(
+        ctx,
+        sort_criteria,
+        geo_strategy,
+        from + length,
+        vector,
+        embedder_name,
+        embedder,
+        quantized,
+    )?;
+
+    let mut placeholder_search_logger = logger::DefaultSearchLogger;
+    let placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery> =
+        &mut placeholder_search_logger;
+
+    let BucketSortOutput { docids, scores, all_candidates, degraded } = bucket_sort(
+        ctx,
+        ranking_rules,
+        &PlaceholderQuery,
+        distinct.as_deref(),
+        &universe,
+        from,
+        length,
+        scoring_strategy,
+        placeholder_search_logger,
+        time_budget,
+        ranking_score_threshold,
+    )?;
+
+    Ok(PartialSearchResult {
+        candidates: all_candidates,
+        document_scores: scores,
+        documents_ids: docids,
+        located_query_terms: None,
+        degraded,
+        used_negative_operator: false,
+    })
+}
+
+#[allow(clippy::too_many_arguments)]
+#[tracing::instrument(level = "trace", skip_all, target = "search::main")]
+pub fn execute_search(
+    ctx: &mut SearchContext<'_>,
+    query: Option<&str>,
+    terms_matching_strategy: TermsMatchingStrategy,
+    scoring_strategy: ScoringStrategy,
+    exhaustive_number_hits: bool,
+    mut universe: RoaringBitmap,
+    sort_criteria: &Option<Vec<AscDesc>>,
+    distinct: &Option<String>,
+    geo_strategy: geo_sort::Strategy,
+    from: usize,
+    length: usize,
+    words_limit: Option<usize>,
+    placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
+    query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
+    time_budget: TimeBudget,
+    ranking_score_threshold: Option<f64>,
+    locales: Option<&Vec<Language>>,
+) -> Result<PartialSearchResult> {
+    check_sort_criteria(ctx, sort_criteria.as_ref())?;
+
+    let mut used_negative_operator = false;
+    let mut located_query_terms = None;
+    let query_terms = if let Some(query) = query {
+        let span = tracing::trace_span!(target: "search::tokens", "tokenizer_builder");
+        let entered = span.enter();
+
+        // We make sure that the analyzer is aware of the stop words
+        // this ensures that the query builder is able to properly remove them.
+        let mut tokbuilder = TokenizerBuilder::new();
+        let stop_words = ctx.index.stop_words(ctx.txn)?;
+        if let Some(ref stop_words) = stop_words {
+            tokbuilder.stop_words(stop_words);
+        }
+
+        let separators = ctx.index.allowed_separators(ctx.txn)?;
+        let separators: Option<Vec<_>> =
+            separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
+        if let Some(ref separators) = separators {
+            tokbuilder.separators(separators);
+        }
+
+        let dictionary = ctx.index.dictionary(ctx.txn)?;
+        let dictionary: Option<Vec<_>> =
+            dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
+        if let Some(ref dictionary) = dictionary {
+            tokbuilder.words_dict(dictionary);
+        }
+
+        let db_locales;
+        match locales {
+            Some(locales) => {
+                if !locales.is_empty() {
+                    tokbuilder.allow_list(locales);
+                }
+            }
+            None => {
+                // If no locales are specified, we use the locales specified in the localized attributes rules
+                let localized_attributes_rules = ctx.index.localized_attributes_rules(ctx.txn)?;
+                let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
+                let searchable_fields = ctx.index.searchable_fields_ids(ctx.txn)?;
+
+                let localized_fields = match &ctx.restricted_fids {
+                    // if AttributeToSearchOn is set, use the restricted list of ids
+                    Some(restricted_fids) => {
+                        let iter = restricted_fids
+                            .exact
+                            .iter()
+                            .chain(restricted_fids.tolerant.iter())
+                            .map(|(fid, _)| *fid);
+
+                        LocalizedFieldIds::new(&localized_attributes_rules, &fields_ids_map, iter)
+                    }
+                    // Otherwise use the full list of ids coming from the index searchable fields
+                    None => LocalizedFieldIds::new(
+                        &localized_attributes_rules,
+                        &fields_ids_map,
+                        searchable_fields.into_iter(),
+                    ),
+                };
+
+                db_locales = localized_fields.all_locales();
+                if !db_locales.is_empty() {
+                    tokbuilder.allow_list(&db_locales);
+                }
+            }
+        };
+
+        let tokenizer = tokbuilder.build();
+        drop(entered);
+
+        let span = tracing::trace_span!(target: "search::tokens", "tokenize");
+        let entered = span.enter();
+        let tokens = tokenizer.tokenize(query);
+        drop(entered);
+
+        let ExtractedTokens { query_terms, negative_words, negative_phrases } =
+            located_query_terms_from_tokens(ctx, tokens, words_limit)?;
+        used_negative_operator = !negative_words.is_empty() || !negative_phrases.is_empty();
+
+        let ignored_documents = resolve_negative_words(ctx, Some(&universe), &negative_words)?;
+        let ignored_phrases = resolve_negative_phrases(ctx, &negative_phrases)?;
+
+        universe -= ignored_documents;
+        universe -= ignored_phrases;
+
+        if query_terms.is_empty() {
+            // Do a placeholder search instead
+            None
+        } else {
+            Some(query_terms)
+        }
+    } else {
+        None
+    };
+
+    let bucket_sort_output = if let Some(query_terms) = query_terms {
+        let (graph, new_located_query_terms) = QueryGraph::from_query(ctx, &query_terms)?;
+        located_query_terms = Some(new_located_query_terms);
+
+        let ranking_rules = get_ranking_rules_for_query_graph_search(
+            ctx,
+            sort_criteria,
+            geo_strategy,
+            terms_matching_strategy,
+        )?;
+
+        universe &=
+            resolve_universe(ctx, &universe, &graph, terms_matching_strategy, query_graph_logger)?;
+
+        bucket_sort(
+            ctx,
+            ranking_rules,
+            &graph,
+            distinct.as_deref(),
+            &universe,
+            from,
+            length,
+            scoring_strategy,
+            query_graph_logger,
+            time_budget,
+            ranking_score_threshold,
+        )?
+    } else {
+        let ranking_rules =
+            get_ranking_rules_for_placeholder_search(ctx, sort_criteria, geo_strategy)?;
+        bucket_sort(
+            ctx,
+            ranking_rules,
+            &PlaceholderQuery,
+            distinct.as_deref(),
+            &universe,
+            from,
+            length,
+            scoring_strategy,
+            placeholder_search_logger,
+            time_budget,
+            ranking_score_threshold,
+        )?
+    };
+
+    let BucketSortOutput { docids, scores, mut all_candidates, degraded } = bucket_sort_output;
+    let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?;
+
+    // The candidates is the universe unless the exhaustive number of hits
+    // is requested and a distinct attribute is set.
+    if exhaustive_number_hits {
+        let distinct_field = match distinct.as_deref() {
+            Some(distinct) => Some(distinct),
+            None => ctx.index.distinct_field(ctx.txn)?,
+        };
+
+        if let Some(f) = distinct_field {
+            if let Some(distinct_fid) = fields_ids_map.id(f) {
+                all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
+            }
+        }
+    }
+
+    Ok(PartialSearchResult {
+        candidates: all_candidates,
+        document_scores: scores,
+        documents_ids: docids,
+        located_query_terms,
+        degraded,
+        used_negative_operator,
+    })
+}
+
+fn check_sort_criteria(
+    ctx: &SearchContext<'_>,
+    sort_criteria: Option<&Vec<AscDesc>>,
+) -> Result<()> {
+    let sort_criteria = if let Some(sort_criteria) = sort_criteria {
+        sort_criteria
+    } else {
+        return Ok(());
+    };
+
+    if sort_criteria.is_empty() {
+        return Ok(());
+    }
+
+    // We check that the sort ranking rule exists and throw an
+    // error if we try to use it and that it doesn't.
+    let sort_ranking_rule_missing = !ctx.index.criteria(ctx.txn)?.contains(&crate::Criterion::Sort);
+    if sort_ranking_rule_missing {
+        return Err(UserError::SortRankingRuleMissing.into());
+    }
+
+    // We check that we are allowed to use the sort criteria, we check
+    // that they are declared in the sortable fields.
+    let sortable_fields = ctx.index.sortable_fields(ctx.txn)?;
+    for asc_desc in sort_criteria {
+        match asc_desc.member() {
+            Member::Field(ref field) if !crate::is_faceted(field, &sortable_fields) => {
+                let (valid_fields, hidden_fields) =
+                    ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
+
+                return Err(UserError::InvalidSortableAttribute {
+                    field: field.to_string(),
+                    valid_fields,
+                    hidden_fields,
+                }
+                .into());
+            }
+            Member::Geo(_) if !sortable_fields.contains("_geo") => {
+                let (valid_fields, hidden_fields) =
+                    ctx.index.remove_hidden_fields(ctx.txn, sortable_fields)?;
+
+                return Err(UserError::InvalidSortableAttribute {
+                    field: "_geo".to_string(),
+                    valid_fields,
+                    hidden_fields,
+                }
+                .into());
+            }
+            _ => (),
+        }
+    }
+
+    Ok(())
+}
+
+pub struct PartialSearchResult {
+    pub located_query_terms: Option<Vec<LocatedQueryTerm>>,
+    pub candidates: RoaringBitmap,
+    pub documents_ids: Vec<DocumentId>,
+    pub document_scores: Vec<Vec<ScoreDetails>>,
+
+    pub degraded: bool,
+    pub used_negative_operator: bool,
+}
--- a/crates/milli/src/search/new/query_graph.rs
+++ b/crates/milli/src/search/new/query_graph.rs
@ -0,0 +1,536 @@
+use std::cmp::{Ordering, Reverse};
+use std::collections::BTreeMap;
+use std::hash::{Hash, Hasher};
+
+use fxhash::{FxHashMap, FxHasher};
+use roaring::RoaringBitmap;
+
+use super::interner::{FixedSizeInterner, Interned};
+use super::query_term::{
+    self, number_of_typos_allowed, LocatedQueryTerm, LocatedQueryTermSubset, QueryTermSubset,
+};
+use super::small_bitmap::SmallBitmap;
+use super::SearchContext;
+use crate::search::new::interner::Interner;
+use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
+use crate::Result;
+
+/// A node of the [`QueryGraph`].
+///
+/// There are four types of nodes:
+/// 1. `Start` : unique, represents the start of the query
+/// 2. `End` : unique, represents the end of a query
+/// 3. `Deleted` : represents a node that was deleted.
+/// All deleted nodes are unreachable from the start node.
+/// 4. `Term` is a regular node representing a word or combination of words
+/// from the user query.
+#[derive(Clone)]
+pub struct QueryNode {
+    pub data: QueryNodeData,
+    pub predecessors: SmallBitmap<QueryNode>,
+    pub successors: SmallBitmap<QueryNode>,
+}
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub enum QueryNodeData {
+    Term(LocatedQueryTermSubset),
+    Deleted,
+    Start,
+    End,
+}
+
+/**
+A graph representing all the ways to interpret the user's search query.
+
+## Example 1
+For the search query `sunflower`, we need to register the following things:
+- we need to look for the exact word `sunflower`
+- but also any word which is 1 or 2 typos apart from `sunflower`
+- and every word that contains the prefix `sunflower`
+- and also the couple of adjacent words `sun flower`
+- as well as all the user-defined synonyms of `sunflower`
+
+All these derivations of a word will be stored in [`QueryTerm`].
+
+## Example 2:
+For the search query `summer house by`.
+
+We also look for all word derivations of each term. And we also need to consider
+the potential n-grams `summerhouse`, `summerhouseby`, and `houseby`.
+Furthermore, we need to know which words these ngrams replace. This is done by creating the
+following graph, where each node also contains a list of derivations:
+```txt
+                        ┌───────┐
+                      ┌─│houseby│─────────┐
+                      │ └───────┘         │
+┌───────┐   ┌───────┐ │ ┌───────┐  ┌────┐ │ ┌───────┐
+│ START │─┬─│summer │─┴─│ house │┌─│ by │─┼─│  END  │
+└───────┘ │ └───────┘   └───────┘│ └────┘ │ └───────┘
+          │ ┌────────────┐       │        │
+          ├─│summerhouse │───────┘        │
+          │ └────────────┘                │
+          │         ┌─────────────┐       │
+          └─────────│summerhouseby│───────┘
+                    └─────────────┘
+```
+Note also that each node has a range of positions associated with it,
+such that `summer` is known to be a word at the positions `0..=0` and `houseby`
+is registered with the positions `1..=2`. When two nodes are connected by an edge,
+it means that they are potentially next to each other in the user's search query
+(depending on the [`TermsMatchingStrategy`](crate::search::TermsMatchingStrategy)
+and the transformations that were done on the query graph).
+*/
+#[derive(Clone)]
+pub struct QueryGraph {
+    /// The index of the start node within `self.nodes`
+    pub root_node: Interned<QueryNode>,
+    /// The index of the end node within `self.nodes`
+    pub end_node: Interned<QueryNode>,
+    /// The list of all query nodes
+    pub nodes: FixedSizeInterner<QueryNode>,
+}
+
+impl QueryGraph {
+    /// Build the query graph from the parsed user search query, return an updated list of the located query terms
+    /// which contains ngrams.
+    pub fn from_query(
+        ctx: &mut SearchContext<'_>,
+        // The terms here must be consecutive
+        terms: &[LocatedQueryTerm],
+    ) -> Result<(QueryGraph, Vec<LocatedQueryTerm>)> {
+        let mut new_located_query_terms = terms.to_vec();
+
+        let nbr_typos = number_of_typos_allowed(ctx)?;
+
+        let mut nodes_data: Vec<QueryNodeData> = vec![QueryNodeData::Start, QueryNodeData::End];
+        let root_node = 0;
+        let end_node = 1;
+
+        // Ee could consider generalizing to 4,5,6,7,etc. ngrams
+        let (mut prev2, mut prev1, mut prev0): (Vec<u16>, Vec<u16>, Vec<u16>) =
+            (vec![], vec![], vec![root_node]);
+
+        let original_terms_len = terms.len();
+        for term_idx in 0..original_terms_len {
+            let mut new_nodes = vec![];
+
+            let new_node_idx = add_node(
+                &mut nodes_data,
+                QueryNodeData::Term(LocatedQueryTermSubset {
+                    term_subset: QueryTermSubset::full(terms[term_idx].value),
+                    positions: terms[term_idx].positions.clone(),
+                    term_ids: term_idx as u8..=term_idx as u8,
+                }),
+            );
+            new_nodes.push(new_node_idx);
+
+            if !prev1.is_empty() {
+                if let Some(ngram) =
+                    query_term::make_ngram(ctx, &terms[term_idx - 1..=term_idx], &nbr_typos)?
+                {
+                    new_located_query_terms.push(ngram.clone());
+                    let ngram_idx = add_node(
+                        &mut nodes_data,
+                        QueryNodeData::Term(LocatedQueryTermSubset {
+                            term_subset: QueryTermSubset::full(ngram.value),
+                            positions: ngram.positions,
+                            term_ids: term_idx as u8 - 1..=term_idx as u8,
+                        }),
+                    );
+                    new_nodes.push(ngram_idx);
+                }
+            }
+            if !prev2.is_empty() {
+                if let Some(ngram) =
+                    query_term::make_ngram(ctx, &terms[term_idx - 2..=term_idx], &nbr_typos)?
+                {
+                    new_located_query_terms.push(ngram.clone());
+                    let ngram_idx = add_node(
+                        &mut nodes_data,
+                        QueryNodeData::Term(LocatedQueryTermSubset {
+                            term_subset: QueryTermSubset::full(ngram.value),
+                            positions: ngram.positions,
+                            term_ids: term_idx as u8 - 2..=term_idx as u8,
+                        }),
+                    );
+                    new_nodes.push(ngram_idx);
+                }
+            }
+            (prev0, prev1, prev2) = (new_nodes, prev0, prev1);
+        }
+
+        let root_node = Interned::from_raw(root_node);
+        let end_node = Interned::from_raw(end_node);
+        let mut nodes = FixedSizeInterner::new(
+            nodes_data.len() as u16,
+            QueryNode {
+                data: QueryNodeData::Deleted,
+                predecessors: SmallBitmap::new(nodes_data.len() as u16),
+                successors: SmallBitmap::new(nodes_data.len() as u16),
+            },
+        );
+        for (node_idx, node_data) in nodes_data.into_iter().enumerate() {
+            let node = nodes.get_mut(Interned::from_raw(node_idx as u16));
+            node.data = node_data;
+        }
+        let mut graph = QueryGraph { root_node, end_node, nodes };
+        graph.build_initial_edges();
+
+        Ok((graph, new_located_query_terms))
+    }
+
+    /// Remove the given nodes, connecting all their predecessors to all their successors.
+    pub fn remove_nodes_keep_edges(&mut self, nodes: &[Interned<QueryNode>]) {
+        for &node_id in nodes {
+            let node = self.nodes.get(node_id);
+            let old_node_pred = node.predecessors.clone();
+            let old_node_succ = node.successors.clone();
+            for pred in old_node_pred.iter() {
+                let pred_successors = &mut self.nodes.get_mut(pred).successors;
+                pred_successors.remove(node_id);
+                pred_successors.union(&old_node_succ);
+            }
+            for succ in old_node_succ.iter() {
+                let succ_predecessors = &mut self.nodes.get_mut(succ).predecessors;
+                succ_predecessors.remove(node_id);
+                succ_predecessors.union(&old_node_pred);
+            }
+            let node = self.nodes.get_mut(node_id);
+            node.data = QueryNodeData::Deleted;
+            node.predecessors.clear();
+            node.successors.clear();
+        }
+    }
+
+    /// Remove the given nodes and all their edges from the query graph.
+    pub fn remove_nodes(&mut self, nodes: &[Interned<QueryNode>]) {
+        for &node_id in nodes {
+            let node = &self.nodes.get(node_id);
+            let old_node_pred = node.predecessors.clone();
+            let old_node_succ = node.successors.clone();
+
+            for pred in old_node_pred.iter() {
+                self.nodes.get_mut(pred).successors.remove(node_id);
+            }
+            for succ in old_node_succ.iter() {
+                self.nodes.get_mut(succ).predecessors.remove(node_id);
+            }
+
+            let node = self.nodes.get_mut(node_id);
+            node.data = QueryNodeData::Deleted;
+            node.predecessors.clear();
+            node.successors.clear();
+        }
+    }
+    /// Simplify the query graph by removing all nodes that are disconnected from
+    /// the start or end nodes.
+    pub fn simplify(&mut self) {
+        loop {
+            let mut nodes_to_remove = vec![];
+            for (node_idx, node) in self.nodes.iter() {
+                if (!matches!(node.data, QueryNodeData::End | QueryNodeData::Deleted)
+                    && node.successors.is_empty())
+                    || (!matches!(node.data, QueryNodeData::Start | QueryNodeData::Deleted)
+                        && node.predecessors.is_empty())
+                {
+                    nodes_to_remove.push(node_idx);
+                }
+            }
+            if nodes_to_remove.is_empty() {
+                break;
+            } else {
+                self.remove_nodes(&nodes_to_remove);
+            }
+        }
+    }
+
+    fn build_initial_edges(&mut self) {
+        for (_, node) in self.nodes.iter_mut() {
+            node.successors.clear();
+            node.predecessors.clear();
+        }
+        for node_id in self.nodes.indexes() {
+            let node = self.nodes.get(node_id);
+            let end_prev_term_id = match &node.data {
+                QueryNodeData::Term(term) => *term.term_ids.end() as i16,
+                QueryNodeData::Start => -1,
+                QueryNodeData::Deleted => continue,
+                QueryNodeData::End => continue,
+            };
+            let successors = {
+                let mut successors = SmallBitmap::for_interned_values_in(&self.nodes);
+                let mut min = i16::MAX;
+                for (node_id, node) in self.nodes.iter() {
+                    let start_next_term_id = match &node.data {
+                        QueryNodeData::Term(term) => *term.term_ids.start() as i16,
+                        QueryNodeData::End => i16::MAX,
+                        QueryNodeData::Start => continue,
+                        QueryNodeData::Deleted => continue,
+                    };
+                    if start_next_term_id <= end_prev_term_id {
+                        continue;
+                    }
+                    match start_next_term_id.cmp(&min) {
+                        Ordering::Less => {
+                            min = start_next_term_id;
+                            successors.clear();
+                            successors.insert(node_id);
+                        }
+                        Ordering::Equal => {
+                            successors.insert(node_id);
+                        }
+                        Ordering::Greater => continue,
+                    }
+                }
+                successors
+            };
+            let node = self.nodes.get_mut(node_id);
+            node.successors = successors.clone();
+            for successor in successors.iter() {
+                let successor = self.nodes.get_mut(successor);
+                successor.predecessors.insert(node_id);
+            }
+        }
+    }
+
+    pub fn removal_order_for_terms_matching_strategy_frequency(
+        &self,
+        ctx: &mut SearchContext<'_>,
+    ) -> Result<Vec<SmallBitmap<QueryNode>>> {
+        // lookup frequency for each term
+        let mut term_with_frequency: Vec<(u8, u64)> = {
+            let mut term_docids: BTreeMap<u8, RoaringBitmap> = Default::default();
+            for (_, node) in self.nodes.iter() {
+                match &node.data {
+                    QueryNodeData::Term(t) => {
+                        let docids = compute_query_term_subset_docids(ctx, None, &t.term_subset)?;
+                        for id in t.term_ids.clone() {
+                            term_docids
+                                .entry(id)
+                                .and_modify(|curr| *curr |= &docids)
+                                .or_insert_with(|| docids.clone());
+                        }
+                    }
+                    QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue,
+                }
+            }
+            term_docids
+                .into_iter()
+                .map(|(idx, docids)| match docids.len() {
+                    0 => (idx, u64::MAX),
+                    frequency => (idx, frequency),
+                })
+                .collect()
+        };
+        term_with_frequency.sort_by_key(|(_, frequency)| Reverse(*frequency));
+        let mut term_weight = BTreeMap::new();
+        let mut weight: u16 = 1;
+        let mut peekable = term_with_frequency.into_iter().peekable();
+        while let Some((idx, frequency)) = peekable.next() {
+            term_weight.insert(idx, weight);
+            if peekable.peek().map_or(false, |(_, f)| frequency != *f) {
+                weight += 1;
+            }
+        }
+        let cost_of_term_idx = move |term_idx: u8| *term_weight.get(&term_idx).unwrap();
+        Ok(self.removal_order_for_terms_matching_strategy(ctx, cost_of_term_idx))
+    }
+
+    pub fn removal_order_for_terms_matching_strategy_last(
+        &self,
+        ctx: &SearchContext<'_>,
+    ) -> Vec<SmallBitmap<QueryNode>> {
+        let (first_term_idx, last_term_idx) = {
+            let mut first_term_idx = u8::MAX;
+            let mut last_term_idx = 0u8;
+            for (_, node) in self.nodes.iter() {
+                match &node.data {
+                    QueryNodeData::Term(t) => {
+                        if *t.term_ids.end() > last_term_idx {
+                            last_term_idx = *t.term_ids.end();
+                        }
+                        if *t.term_ids.start() < first_term_idx {
+                            first_term_idx = *t.term_ids.start();
+                        }
+                    }
+                    QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue,
+                }
+            }
+            (first_term_idx, last_term_idx)
+        };
+        if first_term_idx >= last_term_idx {
+            return vec![];
+        }
+
+        let cost_of_term_idx = |term_idx: u8| {
+            let rank = 1 + last_term_idx - term_idx;
+            rank as u16
+        };
+        self.removal_order_for_terms_matching_strategy(ctx, cost_of_term_idx)
+    }
+
+    pub fn removal_order_for_terms_matching_strategy(
+        &self,
+        ctx: &SearchContext<'_>,
+        order: impl Fn(u8) -> u16,
+    ) -> Vec<SmallBitmap<QueryNode>> {
+        let mut nodes_to_remove = BTreeMap::<u16, SmallBitmap<QueryNode>>::new();
+        let mut at_least_one_mandatory_term = false;
+        for (node_id, node) in self.nodes.iter() {
+            let QueryNodeData::Term(t) = &node.data else { continue };
+            if t.term_subset.original_phrase(ctx).is_some() || t.term_subset.is_mandatory() {
+                at_least_one_mandatory_term = true;
+                continue;
+            }
+            let mut cost = 0;
+            for id in t.term_ids.clone() {
+                cost = std::cmp::max(cost, order(id));
+            }
+            nodes_to_remove
+                .entry(cost)
+                .or_insert_with(|| SmallBitmap::for_interned_values_in(&self.nodes))
+                .insert(node_id);
+        }
+        let mut res: Vec<_> = nodes_to_remove.into_values().collect();
+        if !at_least_one_mandatory_term {
+            res.pop();
+        }
+        res
+    }
+
+    /// Number of words in the phrases in this query graph
+    pub(crate) fn words_in_phrases_count(&self, ctx: &SearchContext<'_>) -> usize {
+        let mut word_count = 0;
+        for (_, node) in self.nodes.iter() {
+            match &node.data {
+                QueryNodeData::Term(term) => {
+                    let Some(phrase) = term.term_subset.original_phrase(ctx) else { continue };
+                    let phrase = ctx.phrase_interner.get(phrase);
+                    word_count += phrase.words.iter().copied().filter(|a| a.is_some()).count()
+                }
+                _ => continue,
+            }
+        }
+        word_count
+    }
+}
+
+fn add_node(nodes_data: &mut Vec<QueryNodeData>, node_data: QueryNodeData) -> u16 {
+    let new_node_idx = nodes_data.len() as u16;
+    nodes_data.push(node_data);
+    new_node_idx
+}
+
+impl QueryGraph {
+    /*
+    Build a query graph from a list of paths
+
+    The paths are composed of source and dest terms.
+
+    For example, consider the following paths:
+    ```txt
+    PATH 1 :  a -> b1 -> c1 -> d -> e1
+    PATH 2 :  a -> b2 -> c2 -> d -> e2
+    ```
+    Then the resulting graph will be:
+    ```txt
+              ┌────┐  ┌────┐   ┌────┐   ┌────┐
+           ┌──│ b1 │──│ c1 │───│ d  │───│ e1 │
+    ┌────┐ │  └────┘  └────┘   └────┘   └────┘
+    │ a  │─┤
+    └────┘ │  ┌────┐  ┌────┐   ┌────┐   ┌────┐
+           └──│ b2 │──│ c2 │───│ d  │───│ e2 │
+              └────┘  └────┘   └────┘   └────┘
+    ```
+    */
+    pub fn build_from_paths(
+        paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>>,
+    ) -> Self {
+        let mut node_data = Interner::default();
+        let root_node = node_data.push(QueryNodeData::Start);
+        let end_node = node_data.push(QueryNodeData::End);
+
+        let mut paths_with_single_terms = vec![];
+
+        for path in paths {
+            let mut processed_path = vec![];
+            let mut prev_dest_term: Option<LocatedQueryTermSubset> = None;
+            for (start_term, dest_term) in path {
+                if let Some(prev_dest_term) = prev_dest_term.take() {
+                    if let Some(mut start_term) = start_term {
+                        if start_term.term_ids == prev_dest_term.term_ids {
+                            start_term.term_subset.intersect(&prev_dest_term.term_subset);
+                            processed_path.push(start_term);
+                        } else {
+                            processed_path.push(prev_dest_term);
+                            processed_path.push(start_term);
+                        }
+                    } else {
+                        processed_path.push(prev_dest_term);
+                    }
+                } else if let Some(start_term) = start_term {
+                    processed_path.push(start_term);
+                }
+                prev_dest_term = Some(dest_term);
+            }
+            if let Some(prev_dest_term) = prev_dest_term {
+                processed_path.push(prev_dest_term);
+            }
+            paths_with_single_terms.push(processed_path);
+        }
+
+        let mut paths_with_single_terms_and_suffix_hash = vec![];
+        for path in paths_with_single_terms {
+            let mut hasher = FxHasher::default();
+            let mut path_with_hash = vec![];
+            for term in path.into_iter().rev() {
+                term.hash(&mut hasher);
+                path_with_hash.push((term, hasher.finish()));
+            }
+            path_with_hash.reverse();
+            paths_with_single_terms_and_suffix_hash.push(path_with_hash);
+        }
+
+        let mut node_data_id_for_term_and_suffix_hash =
+            FxHashMap::<(LocatedQueryTermSubset, u64), Interned<QueryNodeData>>::default();
+
+        let mut paths_with_ids = vec![];
+        for path in paths_with_single_terms_and_suffix_hash {
+            let mut path_with_ids = vec![];
+            for (term, suffix_hash) in path {
+                let node_data_id = node_data_id_for_term_and_suffix_hash
+                    .entry((term.clone(), suffix_hash))
+                    .or_insert_with(|| node_data.push(QueryNodeData::Term(term)));
+                path_with_ids.push(Interned::from_raw(node_data_id.into_raw()));
+            }
+            paths_with_ids.push(path_with_ids);
+        }
+
+        let nodes_data = node_data.freeze();
+        let nodes_data_len = nodes_data.len();
+        let mut nodes = nodes_data.map_move(|n| QueryNode {
+            data: n,
+            predecessors: SmallBitmap::new(nodes_data_len),
+            successors: SmallBitmap::new(nodes_data_len),
+        });
+
+        let root_node = Interned::<QueryNode>::from_raw(root_node.into_raw());
+        let end_node = Interned::<QueryNode>::from_raw(end_node.into_raw());
+
+        for path in paths_with_ids {
+            let mut prev_node_id = root_node;
+            for node_id in path {
+                let prev_node = nodes.get_mut(prev_node_id);
+                prev_node.successors.insert(node_id);
+                let node = nodes.get_mut(node_id);
+                node.predecessors.insert(prev_node_id);
+                prev_node_id = node_id;
+            }
+            let prev_node = nodes.get_mut(prev_node_id);
+            prev_node.successors.insert(end_node);
+            let node = nodes.get_mut(end_node);
+            node.predecessors.insert(prev_node_id);
+        }
+
+        QueryGraph { root_node, end_node, nodes }
+    }
+}
--- a/crates/milli/src/search/new/query_term/compute_derivations.rs
+++ b/crates/milli/src/search/new/query_term/compute_derivations.rs
@ -0,0 +1,428 @@
+use std::borrow::Cow;
+use std::collections::BTreeSet;
+use std::ops::ControlFlow;
+
+use fst::automaton::Str;
+use fst::{Automaton, IntoStreamer, Streamer};
+use heed::types::DecodeIgnore;
+
+use super::{OneTypoTerm, Phrase, QueryTerm, ZeroTypoTerm};
+use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
+use crate::search::new::interner::{DedupInterner, Interned};
+use crate::search::new::query_term::{Lazy, TwoTypoTerm};
+use crate::search::new::{limits, SearchContext};
+use crate::search::{build_dfa, get_first};
+use crate::{Result, MAX_WORD_LENGTH};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum NumberOfTypos {
+    Zero,
+    One,
+    Two,
+}
+
+pub enum ZeroOrOneTypo {
+    Zero,
+    One,
+}
+
+impl Interned<QueryTerm> {
+    pub fn compute_fully_if_needed(self, ctx: &mut SearchContext<'_>) -> Result<()> {
+        let s = ctx.term_interner.get_mut(self);
+        if s.max_levenshtein_distance <= 1 && s.one_typo.is_uninit() {
+            assert!(s.two_typo.is_uninit());
+            // Initialize one_typo subterm even if max_nbr_typo is 0 because of split words
+            self.initialize_one_typo_subterm(ctx)?;
+            let s = ctx.term_interner.get_mut(self);
+            assert!(s.one_typo.is_init());
+            s.two_typo = Lazy::Init(TwoTypoTerm::default());
+        } else if s.max_levenshtein_distance > 1 && s.two_typo.is_uninit() {
+            assert!(s.two_typo.is_uninit());
+            self.initialize_one_and_two_typo_subterm(ctx)?;
+            let s = ctx.term_interner.get_mut(self);
+            assert!(s.one_typo.is_init() && s.two_typo.is_init());
+        }
+        Ok(())
+    }
+}
+
+fn find_zero_typo_prefix_derivations(
+    word_interned: Interned<String>,
+    fst: fst::Set<Cow<'_, [u8]>>,
+    word_interner: &mut DedupInterner<String>,
+    mut visit: impl FnMut(Interned<String>) -> Result<ControlFlow<()>>,
+) -> Result<()> {
+    let word = word_interner.get(word_interned).to_owned();
+    let word = word.as_str();
+    let prefix = Str::new(word).starts_with();
+    let mut stream = fst.search(prefix).into_stream();
+
+    while let Some(derived_word) = stream.next() {
+        let derived_word = std::str::from_utf8(derived_word)?.to_owned();
+        let derived_word_interned = word_interner.insert(derived_word);
+        if derived_word_interned != word_interned {
+            let cf = visit(derived_word_interned)?;
+            if cf.is_break() {
+                break;
+            }
+        }
+    }
+    Ok(())
+}
+
+fn find_zero_one_typo_derivations(
+    ctx: &mut SearchContext<'_>,
+    word_interned: Interned<String>,
+    is_prefix: bool,
+    mut visit: impl FnMut(Interned<String>, ZeroOrOneTypo) -> Result<ControlFlow<()>>,
+) -> Result<()> {
+    let fst = ctx.get_words_fst()?;
+    let word = ctx.word_interner.get(word_interned).to_owned();
+    let word = word.as_str();
+
+    let dfa = build_dfa(word, 1, is_prefix);
+    let starts = StartsWith(Str::new(get_first(word)));
+    let mut stream = fst.search_with_state(Intersection(starts, &dfa)).into_stream();
+
+    while let Some((derived_word, state)) = stream.next() {
+        let derived_word = std::str::from_utf8(derived_word)?;
+        let derived_word = ctx.word_interner.insert(derived_word.to_owned());
+        let d = dfa.distance(state.1);
+        match d.to_u8() {
+            0 => {
+                if derived_word != word_interned {
+                    let cf = visit(derived_word, ZeroOrOneTypo::Zero)?;
+                    if cf.is_break() {
+                        break;
+                    }
+                }
+            }
+            1 => {
+                let cf = visit(derived_word, ZeroOrOneTypo::One)?;
+                if cf.is_break() {
+                    break;
+                }
+            }
+            _ => {
+                unreachable!("One typo dfa produced multiple typos")
+            }
+        }
+    }
+    Ok(())
+}
+
+fn find_zero_one_two_typo_derivations(
+    word_interned: Interned<String>,
+    is_prefix: bool,
+    fst: fst::Set<Cow<'_, [u8]>>,
+    word_interner: &mut DedupInterner<String>,
+    mut visit: impl FnMut(Interned<String>, NumberOfTypos) -> Result<ControlFlow<()>>,
+) -> Result<()> {
+    let word = word_interner.get(word_interned).to_owned();
+    let word = word.as_str();
+
+    let starts = StartsWith(Str::new(get_first(word)));
+    let first = Intersection(build_dfa(word, 1, is_prefix), Complement(&starts));
+    let second_dfa = build_dfa(word, 2, is_prefix);
+    let second = Intersection(&second_dfa, &starts);
+    let automaton = Union(first, &second);
+
+    let mut stream = fst.search_with_state(automaton).into_stream();
+
+    while let Some((derived_word, state)) = stream.next() {
+        let derived_word = std::str::from_utf8(derived_word)?;
+        let derived_word_interned = word_interner.insert(derived_word.to_owned());
+        // in the case the typo is on the first letter, we know the number of typo
+        // is two
+        if get_first(derived_word) != get_first(word) {
+            let cf = visit(derived_word_interned, NumberOfTypos::Two)?;
+            if cf.is_break() {
+                break;
+            }
+        } else {
+            // Else, we know that it is the second dfa that matched and compute the
+            // correct distance
+            let d = second_dfa.distance((state.1).0);
+            match d.to_u8() {
+                0 => {
+                    if derived_word_interned != word_interned {
+                        let cf = visit(derived_word_interned, NumberOfTypos::Zero)?;
+                        if cf.is_break() {
+                            break;
+                        }
+                    }
+                }
+                1 => {
+                    let cf = visit(derived_word_interned, NumberOfTypos::One)?;
+                    if cf.is_break() {
+                        break;
+                    }
+                }
+                2 => {
+                    let cf = visit(derived_word_interned, NumberOfTypos::Two)?;
+                    if cf.is_break() {
+                        break;
+                    }
+                }
+                _ => unreachable!("2 typos DFA produced a distance greater than 2"),
+            }
+        }
+    }
+    Ok(())
+}
+
+pub fn partially_initialized_term_from_word(
+    ctx: &mut SearchContext<'_>,
+    word: &str,
+    max_typo: u8,
+    is_prefix: bool,
+    is_ngram: bool,
+) -> Result<QueryTerm> {
+    let word_interned = ctx.word_interner.insert(word.to_owned());
+
+    if word.len() > MAX_WORD_LENGTH {
+        return Ok({
+            QueryTerm {
+                original: ctx.word_interner.insert(word.to_owned()),
+                ngram_words: None,
+                is_prefix: false,
+                max_levenshtein_distance: 0,
+                zero_typo: <_>::default(),
+                one_typo: Lazy::Init(<_>::default()),
+                two_typo: Lazy::Init(<_>::default()),
+            }
+        });
+    }
+
+    let fst = ctx.index.words_fst(ctx.txn)?;
+
+    let use_prefix_db = is_prefix
+        && (ctx
+            .index
+            .word_prefix_docids
+            .remap_data_type::<DecodeIgnore>()
+            .get(ctx.txn, word)?
+            .is_some()
+            || (!is_ngram
+                && ctx
+                    .index
+                    .exact_word_prefix_docids
+                    .remap_data_type::<DecodeIgnore>()
+                    .get(ctx.txn, word)?
+                    .is_some()));
+    let use_prefix_db = if use_prefix_db { Some(word_interned) } else { None };
+
+    let mut zero_typo = None;
+    let mut prefix_of = BTreeSet::new();
+
+    if fst.contains(word) {
+        zero_typo = Some(word_interned);
+    }
+
+    if is_prefix && use_prefix_db.is_none() {
+        find_zero_typo_prefix_derivations(
+            word_interned,
+            fst,
+            &mut ctx.word_interner,
+            |derived_word| {
+                if prefix_of.len() < limits::MAX_PREFIX_COUNT {
+                    prefix_of.insert(derived_word);
+                    Ok(ControlFlow::Continue(()))
+                } else {
+                    Ok(ControlFlow::Break(()))
+                }
+            },
+        )?;
+    }
+    let synonyms = ctx.index.synonyms(ctx.txn)?;
+    let mut synonym_word_count = 0;
+    let synonyms = synonyms
+        .get(&vec![word.to_owned()])
+        .cloned()
+        .unwrap_or_default()
+        .into_iter()
+        .take(limits::MAX_SYNONYM_PHRASE_COUNT)
+        .filter_map(|words| {
+            if synonym_word_count + words.len() > limits::MAX_SYNONYM_WORD_COUNT {
+                return None;
+            }
+            synonym_word_count += words.len();
+            let words = words.into_iter().map(|w| Some(ctx.word_interner.insert(w))).collect();
+            Some(ctx.phrase_interner.insert(Phrase { words }))
+        })
+        .collect();
+    let zero_typo =
+        ZeroTypoTerm { phrase: None, exact: zero_typo, prefix_of, synonyms, use_prefix_db };
+
+    Ok(QueryTerm {
+        original: word_interned,
+        ngram_words: None,
+        max_levenshtein_distance: max_typo,
+        is_prefix,
+        zero_typo,
+        one_typo: Lazy::Uninit,
+        two_typo: Lazy::Uninit,
+    })
+}
+
+fn find_split_words(ctx: &mut SearchContext<'_>, word: &str) -> Result<Option<Interned<Phrase>>> {
+    if let Some((l, r)) = split_best_frequency(ctx, word)? {
+        Ok(Some(ctx.phrase_interner.insert(Phrase { words: vec![Some(l), Some(r)] })))
+    } else {
+        Ok(None)
+    }
+}
+
+impl Interned<QueryTerm> {
+    fn initialize_one_typo_subterm(self, ctx: &mut SearchContext<'_>) -> Result<()> {
+        let self_mut = ctx.term_interner.get_mut(self);
+
+        let allows_split_words = self_mut.allows_split_words();
+        let QueryTerm {
+            original,
+            is_prefix,
+            one_typo,
+            max_levenshtein_distance: max_nbr_typos,
+            ..
+        } = self_mut;
+
+        let original = *original;
+        let is_prefix = *is_prefix;
+        // let original_str = ctx.word_interner.get(*original).to_owned();
+        if one_typo.is_init() {
+            return Ok(());
+        }
+        let mut one_typo_words = BTreeSet::new();
+
+        if *max_nbr_typos > 0 {
+            find_zero_one_typo_derivations(ctx, original, is_prefix, |derived_word, nbr_typos| {
+                match nbr_typos {
+                    ZeroOrOneTypo::Zero => {}
+                    ZeroOrOneTypo::One => {
+                        if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
+                            one_typo_words.insert(derived_word);
+                        } else {
+                            return Ok(ControlFlow::Break(()));
+                        }
+                    }
+                }
+                Ok(ControlFlow::Continue(()))
+            })?;
+        }
+
+        let split_words = if allows_split_words {
+            let original_str = ctx.word_interner.get(original).to_owned();
+            find_split_words(ctx, original_str.as_str())?
+        } else {
+            None
+        };
+
+        let self_mut = ctx.term_interner.get_mut(self);
+
+        // Only add the split words to the derivations if:
+        // 1. the term is neither an ngram nor a phrase; OR
+        // 2. the term is an ngram, but the split words are different from the ngram's component words
+        let split_words = if let Some((ngram_words, split_words)) =
+            self_mut.ngram_words.as_ref().zip(split_words.as_ref())
+        {
+            let Phrase { words } = ctx.phrase_interner.get(*split_words);
+            if ngram_words.iter().ne(words.iter().flatten()) {
+                Some(*split_words)
+            } else {
+                None
+            }
+        } else {
+            split_words
+        };
+        let one_typo = OneTypoTerm { split_words, one_typo: one_typo_words };
+
+        self_mut.one_typo = Lazy::Init(one_typo);
+
+        Ok(())
+    }
+    fn initialize_one_and_two_typo_subterm(self, ctx: &mut SearchContext<'_>) -> Result<()> {
+        let self_mut = ctx.term_interner.get_mut(self);
+        let QueryTerm {
+            original,
+            is_prefix,
+            two_typo,
+            max_levenshtein_distance: max_nbr_typos,
+            ..
+        } = self_mut;
+        let original_str = ctx.word_interner.get(*original).to_owned();
+        if two_typo.is_init() {
+            return Ok(());
+        }
+        let mut one_typo_words = BTreeSet::new();
+        let mut two_typo_words = BTreeSet::new();
+
+        if *max_nbr_typos > 0 {
+            find_zero_one_two_typo_derivations(
+                *original,
+                *is_prefix,
+                ctx.index.words_fst(ctx.txn)?,
+                &mut ctx.word_interner,
+                |derived_word, nbr_typos| {
+                    if one_typo_words.len() >= limits::MAX_ONE_TYPO_COUNT
+                        && two_typo_words.len() >= limits::MAX_TWO_TYPOS_COUNT
+                    {
+                        // No chance we will add either one- or two-typo derivations anymore, stop iterating.
+                        return Ok(ControlFlow::Break(()));
+                    }
+                    match nbr_typos {
+                        NumberOfTypos::Zero => {}
+                        NumberOfTypos::One => {
+                            if one_typo_words.len() < limits::MAX_ONE_TYPO_COUNT {
+                                one_typo_words.insert(derived_word);
+                            }
+                        }
+                        NumberOfTypos::Two => {
+                            if two_typo_words.len() < limits::MAX_TWO_TYPOS_COUNT {
+                                two_typo_words.insert(derived_word);
+                            }
+                        }
+                    }
+                    Ok(ControlFlow::Continue(()))
+                },
+            )?;
+        }
+
+        let split_words = find_split_words(ctx, original_str.as_str())?;
+        let self_mut = ctx.term_interner.get_mut(self);
+
+        let one_typo = OneTypoTerm { one_typo: one_typo_words, split_words };
+
+        let two_typo = TwoTypoTerm { two_typos: two_typo_words };
+
+        self_mut.one_typo = Lazy::Init(one_typo);
+        self_mut.two_typo = Lazy::Init(two_typo);
+
+        Ok(())
+    }
+}
+
+/// Split the original word into the two words that appear the
+/// most next to each other in the index.
+///
+/// Return `None` if the original word cannot be split.
+fn split_best_frequency(
+    ctx: &mut SearchContext<'_>,
+    original: &str,
+) -> Result<Option<(Interned<String>, Interned<String>)>> {
+    let chars = original.char_indices().skip(1);
+    let mut best = None;
+
+    for (i, _) in chars {
+        let (left, right) = original.split_at(i);
+        let left = ctx.word_interner.insert(left.to_owned());
+        let right = ctx.word_interner.insert(right.to_owned());
+
+        if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(None, left, right, 1)? {
+            if best.map_or(true, |(old, _, _)| frequency > old) {
+                best = Some((frequency, left, right));
+            }
+        }
+    }
+
+    Ok(best.map(|(_, left, right)| (left, right)))
+}
--- a/crates/milli/src/search/new/query_term/mod.rs
+++ b/crates/milli/src/search/new/query_term/mod.rs
@ -0,0 +1,510 @@
+mod compute_derivations;
+mod ntypo_subset;
+mod parse_query;
+mod phrase;
+
+use std::collections::BTreeSet;
+use std::iter::FromIterator;
+use std::ops::RangeInclusive;
+
+use either::Either;
+pub use ntypo_subset::NTypoTermSubset;
+pub use parse_query::{
+    located_query_terms_from_tokens, make_ngram, number_of_typos_allowed, ExtractedTokens,
+};
+pub use phrase::Phrase;
+
+use super::interner::{DedupInterner, Interned};
+use super::{limits, SearchContext, Word};
+use crate::Result;
+
+/// A set of word derivations attached to a location in the search query.
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub struct LocatedQueryTermSubset {
+    pub term_subset: QueryTermSubset,
+    pub positions: RangeInclusive<u16>,
+    pub term_ids: RangeInclusive<u8>,
+}
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub struct QueryTermSubset {
+    original: Interned<QueryTerm>,
+    zero_typo_subset: NTypoTermSubset,
+    one_typo_subset: NTypoTermSubset,
+    two_typo_subset: NTypoTermSubset,
+    /// `true` if the term cannot be deleted through the term matching strategy
+    ///
+    /// Note that there are other reasons for which a term cannot be deleted, such as
+    /// being a phrase. In that case, this field could be set to `false`, but it
+    /// still wouldn't be deleteable by the term matching strategy.
+    mandatory: bool,
+}
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub struct QueryTerm {
+    original: Interned<String>,
+    ngram_words: Option<Vec<Interned<String>>>,
+    max_levenshtein_distance: u8,
+    is_prefix: bool,
+    zero_typo: ZeroTypoTerm,
+    // May not be computed yet
+    one_typo: Lazy<OneTypoTerm>,
+    // May not be computed yet
+    two_typo: Lazy<TwoTypoTerm>,
+}
+
+// SubTerms will be in a dedup interner
+#[derive(Default, Clone, PartialEq, Eq, Hash)]
+struct ZeroTypoTerm {
+    /// The original phrase, if any
+    phrase: Option<Interned<Phrase>>,
+    /// A single word equivalent to the original term, with zero typos
+    exact: Option<Interned<String>>,
+    /// All the words that contain the original word as prefix
+    prefix_of: BTreeSet<Interned<String>>,
+    /// All the synonyms of the original word or phrase
+    synonyms: BTreeSet<Interned<Phrase>>,
+    /// A prefix in the prefix databases matching the original word
+    use_prefix_db: Option<Interned<String>>,
+}
+#[derive(Default, Clone, PartialEq, Eq, Hash)]
+struct OneTypoTerm {
+    /// The original word split into multiple consecutive words
+    split_words: Option<Interned<Phrase>>,
+    /// Words that are 1 typo away from the original word
+    one_typo: BTreeSet<Interned<String>>,
+}
+#[derive(Default, Clone, PartialEq, Eq, Hash)]
+struct TwoTypoTerm {
+    /// Words that are 2 typos away from the original word
+    two_typos: BTreeSet<Interned<String>>,
+}
+
+#[derive(Clone, PartialEq, Eq, Hash)]
+pub enum Lazy<T> {
+    Uninit,
+    Init(T),
+}
+impl<T> Lazy<T> {
+    pub fn is_init(&self) -> bool {
+        match self {
+            Lazy::Uninit => false,
+            Lazy::Init(_) => true,
+        }
+    }
+    pub fn is_uninit(&self) -> bool {
+        match self {
+            Lazy::Uninit => true,
+            Lazy::Init(_) => false,
+        }
+    }
+}
+
+#[derive(Clone, Copy)]
+pub enum ExactTerm {
+    Phrase(Interned<Phrase>),
+    Word(Interned<String>),
+}
+
+impl ExactTerm {
+    pub fn interned_words<'ctx>(
+        &self,
+        ctx: &'ctx SearchContext<'ctx>,
+    ) -> impl Iterator<Item = Option<Interned<String>>> + 'ctx {
+        match *self {
+            ExactTerm::Phrase(phrase) => {
+                let phrase = ctx.phrase_interner.get(phrase);
+                Either::Left(phrase.words.iter().copied())
+            }
+            ExactTerm::Word(word) => Either::Right(std::iter::once(Some(word))),
+        }
+    }
+}
+
+impl QueryTermSubset {
+    pub fn is_mandatory(&self) -> bool {
+        self.mandatory
+    }
+    pub fn make_mandatory(&mut self) {
+        self.mandatory = true;
+    }
+    pub fn exact_term(&self, ctx: &SearchContext<'_>) -> Option<ExactTerm> {
+        let full_query_term = ctx.term_interner.get(self.original);
+        if full_query_term.ngram_words.is_some() {
+            return None;
+        }
+        if let Some(phrase) = full_query_term.zero_typo.phrase {
+            self.zero_typo_subset.contains_phrase(phrase).then_some(ExactTerm::Phrase(phrase))
+        } else if let Some(word) = full_query_term.zero_typo.exact {
+            self.zero_typo_subset.contains_word(word).then_some(ExactTerm::Word(word))
+        } else {
+            None
+        }
+    }
+
+    pub fn empty(for_term: Interned<QueryTerm>) -> Self {
+        Self {
+            original: for_term,
+            zero_typo_subset: NTypoTermSubset::Nothing,
+            one_typo_subset: NTypoTermSubset::Nothing,
+            two_typo_subset: NTypoTermSubset::Nothing,
+            mandatory: false,
+        }
+    }
+    pub fn full(for_term: Interned<QueryTerm>) -> Self {
+        Self {
+            original: for_term,
+            zero_typo_subset: NTypoTermSubset::All,
+            one_typo_subset: NTypoTermSubset::All,
+            two_typo_subset: NTypoTermSubset::All,
+            mandatory: false,
+        }
+    }
+
+    pub fn union(&mut self, other: &Self) {
+        assert!(self.original == other.original);
+        self.zero_typo_subset.union(&other.zero_typo_subset);
+        self.one_typo_subset.union(&other.one_typo_subset);
+        self.two_typo_subset.union(&other.two_typo_subset);
+    }
+    pub fn intersect(&mut self, other: &Self) {
+        assert!(self.original == other.original);
+        self.zero_typo_subset.intersect(&other.zero_typo_subset);
+        self.one_typo_subset.intersect(&other.one_typo_subset);
+        self.two_typo_subset.intersect(&other.two_typo_subset);
+    }
+
+    pub fn use_prefix_db(&self, ctx: &SearchContext<'_>) -> Option<Word> {
+        let original = ctx.term_interner.get(self.original);
+        let use_prefix_db = original.zero_typo.use_prefix_db?;
+        let word = match &self.zero_typo_subset {
+            NTypoTermSubset::All => Some(use_prefix_db),
+            NTypoTermSubset::Subset { words, phrases: _ } => {
+                if words.contains(&use_prefix_db) {
+                    Some(use_prefix_db)
+                } else {
+                    None
+                }
+            }
+            NTypoTermSubset::Nothing => None,
+        };
+        word.map(|word| {
+            if original.ngram_words.is_some() {
+                Word::Derived(word)
+            } else {
+                Word::Original(word)
+            }
+        })
+    }
+    pub fn all_single_words_except_prefix_db(
+        &self,
+        ctx: &mut SearchContext<'_>,
+    ) -> Result<BTreeSet<Word>> {
+        let mut result = BTreeSet::default();
+        if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() {
+            self.original.compute_fully_if_needed(ctx)?;
+        }
+
+        let original = ctx.term_interner.get_mut(self.original);
+        match &self.zero_typo_subset {
+            NTypoTermSubset::All => {
+                let ZeroTypoTerm {
+                    phrase: _,
+                    exact: zero_typo,
+                    prefix_of,
+                    synonyms: _,
+                    use_prefix_db: _,
+                } = &original.zero_typo;
+                result.extend(zero_typo.iter().copied().map(|w| {
+                    if original.ngram_words.is_some() {
+                        Word::Derived(w)
+                    } else {
+                        Word::Original(w)
+                    }
+                }));
+                result.extend(prefix_of.iter().copied().map(|w| {
+                    if original.ngram_words.is_some() {
+                        Word::Derived(w)
+                    } else {
+                        Word::Original(w)
+                    }
+                }));
+            }
+            NTypoTermSubset::Subset { words, phrases: _ } => {
+                let ZeroTypoTerm {
+                    phrase: _,
+                    exact: zero_typo,
+                    prefix_of,
+                    synonyms: _,
+                    use_prefix_db: _,
+                } = &original.zero_typo;
+                if let Some(zero_typo) = zero_typo {
+                    if words.contains(zero_typo) {
+                        if original.ngram_words.is_some() {
+                            result.insert(Word::Derived(*zero_typo));
+                        } else {
+                            result.insert(Word::Original(*zero_typo));
+                        }
+                    }
+                }
+                result.extend(prefix_of.intersection(words).copied().map(|w| {
+                    if original.ngram_words.is_some() {
+                        Word::Derived(w)
+                    } else {
+                        Word::Original(w)
+                    }
+                }));
+            }
+            NTypoTermSubset::Nothing => {}
+        }
+
+        match &self.one_typo_subset {
+            NTypoTermSubset::All => {
+                let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo
+                else {
+                    panic!()
+                };
+                result.extend(one_typo.iter().copied().map(Word::Derived))
+            }
+            NTypoTermSubset::Subset { words, phrases: _ } => {
+                let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo
+                else {
+                    panic!()
+                };
+                result.extend(one_typo.intersection(words).copied().map(Word::Derived));
+            }
+            NTypoTermSubset::Nothing => {}
+        };
+
+        match &self.two_typo_subset {
+            NTypoTermSubset::All => {
+                let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() };
+                result.extend(two_typos.iter().copied().map(Word::Derived));
+            }
+            NTypoTermSubset::Subset { words, phrases: _ } => {
+                let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { panic!() };
+                result.extend(two_typos.intersection(words).copied().map(Word::Derived));
+            }
+            NTypoTermSubset::Nothing => {}
+        };
+
+        Ok(result)
+    }
+    pub fn all_phrases(&self, ctx: &mut SearchContext<'_>) -> Result<BTreeSet<Interned<Phrase>>> {
+        let mut result = BTreeSet::default();
+
+        if !self.one_typo_subset.is_empty() {
+            self.original.compute_fully_if_needed(ctx)?;
+        }
+        let original = ctx.term_interner.get_mut(self.original);
+
+        let ZeroTypoTerm { phrase, exact: _, prefix_of: _, synonyms, use_prefix_db: _ } =
+            &original.zero_typo;
+        result.extend(phrase.iter().copied());
+        result.extend(synonyms.iter().copied());
+
+        match &self.one_typo_subset {
+            NTypoTermSubset::All => {
+                let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo
+                else {
+                    panic!();
+                };
+                result.extend(split_words.iter().copied());
+            }
+            NTypoTermSubset::Subset { phrases, .. } => {
+                let Lazy::Init(OneTypoTerm { split_words, one_typo: _ }) = &original.one_typo
+                else {
+                    panic!();
+                };
+                if let Some(split_words) = split_words {
+                    if phrases.contains(split_words) {
+                        result.insert(*split_words);
+                    }
+                }
+            }
+            NTypoTermSubset::Nothing => {}
+        }
+
+        Ok(result)
+    }
+
+    pub fn original_phrase(&self, ctx: &SearchContext<'_>) -> Option<Interned<Phrase>> {
+        let t = ctx.term_interner.get(self.original);
+        if let Some(p) = t.zero_typo.phrase {
+            if self.zero_typo_subset.contains_phrase(p) {
+                return Some(p);
+            }
+        }
+        None
+    }
+    pub fn max_typo_cost(&self, ctx: &SearchContext<'_>) -> u8 {
+        let t = ctx.term_interner.get(self.original);
+        match t.max_levenshtein_distance {
+            0 => {
+                if t.allows_split_words() {
+                    1
+                } else {
+                    0
+                }
+            }
+            1 => {
+                if self.one_typo_subset.is_empty() {
+                    0
+                } else {
+                    1
+                }
+            }
+            2 => {
+                if self.two_typo_subset.is_empty() {
+                    if self.one_typo_subset.is_empty() {
+                        0
+                    } else {
+                        1
+                    }
+                } else {
+                    2
+                }
+            }
+            _ => panic!(),
+        }
+    }
+    pub fn keep_only_exact_term(&mut self, ctx: &SearchContext<'_>) {
+        if let Some(term) = self.exact_term(ctx) {
+            match term {
+                ExactTerm::Phrase(p) => {
+                    self.zero_typo_subset = NTypoTermSubset::Subset {
+                        words: BTreeSet::new(),
+                        phrases: BTreeSet::from_iter([p]),
+                    };
+                    self.clear_one_typo_subset();
+                    self.clear_two_typo_subset();
+                }
+                ExactTerm::Word(w) => {
+                    self.zero_typo_subset = NTypoTermSubset::Subset {
+                        words: BTreeSet::from_iter([w]),
+                        phrases: BTreeSet::new(),
+                    };
+                    self.clear_one_typo_subset();
+                    self.clear_two_typo_subset();
+                }
+            }
+        }
+    }
+    pub fn clear_zero_typo_subset(&mut self) {
+        self.zero_typo_subset = NTypoTermSubset::Nothing;
+    }
+    pub fn clear_one_typo_subset(&mut self) {
+        self.one_typo_subset = NTypoTermSubset::Nothing;
+    }
+    pub fn clear_two_typo_subset(&mut self) {
+        self.two_typo_subset = NTypoTermSubset::Nothing;
+    }
+    pub fn description(&self, ctx: &SearchContext<'_>) -> String {
+        let t = ctx.term_interner.get(self.original);
+        ctx.word_interner.get(t.original).to_owned()
+    }
+}
+
+impl ZeroTypoTerm {
+    fn is_empty(&self) -> bool {
+        let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, use_prefix_db } = self;
+        phrase.is_none()
+            && zero_typo.is_none()
+            && prefix_of.is_empty()
+            && synonyms.is_empty()
+            && use_prefix_db.is_none()
+    }
+}
+impl OneTypoTerm {
+    fn is_empty(&self) -> bool {
+        let OneTypoTerm { split_words, one_typo } = self;
+        one_typo.is_empty() && split_words.is_none()
+    }
+}
+impl TwoTypoTerm {
+    fn is_empty(&self) -> bool {
+        let TwoTypoTerm { two_typos } = self;
+        two_typos.is_empty()
+    }
+}
+
+impl QueryTerm {
+    fn is_empty(&self) -> bool {
+        let Lazy::Init(one_typo) = &self.one_typo else {
+            return false;
+        };
+        let Lazy::Init(two_typo) = &self.two_typo else {
+            return false;
+        };
+
+        self.zero_typo.is_empty() && one_typo.is_empty() && two_typo.is_empty()
+    }
+    fn allows_split_words(&self) -> bool {
+        self.zero_typo.phrase.is_none()
+    }
+}
+
+impl Interned<QueryTerm> {
+    /// Return the original word from the given query term
+    fn original_single_word(self, ctx: &SearchContext<'_>) -> Option<Interned<String>> {
+        let self_ = ctx.term_interner.get(self);
+        if self_.ngram_words.is_some() {
+            None
+        } else {
+            Some(self_.original)
+        }
+    }
+}
+
+/// A query term coupled with its position in the user's search query.
+#[derive(Clone)]
+pub struct LocatedQueryTerm {
+    pub value: Interned<QueryTerm>,
+    pub positions: RangeInclusive<u16>,
+}
+
+impl LocatedQueryTerm {
+    /// Return `true` iff the term is empty
+    pub fn is_empty(&self, interner: &DedupInterner<QueryTerm>) -> bool {
+        interner.get(self.value).is_empty()
+    }
+}
+
+impl QueryTerm {
+    pub fn is_cached_prefix(&self) -> bool {
+        self.zero_typo.use_prefix_db.is_some()
+    }
+    pub fn is_prefix(&self) -> bool {
+        self.is_prefix
+    }
+    pub fn original_word(&self, ctx: &SearchContext<'_>) -> String {
+        ctx.word_interner.get(self.original).clone()
+    }
+
+    pub fn original_phrase(&self) -> Option<Interned<Phrase>> {
+        self.zero_typo.phrase
+    }
+
+    pub fn all_computed_derivations(&self) -> (Vec<Interned<String>>, Vec<Interned<Phrase>>) {
+        let mut words = BTreeSet::new();
+        let mut phrases = BTreeSet::new();
+
+        let ZeroTypoTerm { phrase, exact: zero_typo, prefix_of, synonyms, use_prefix_db: _ } =
+            &self.zero_typo;
+        words.extend(zero_typo.iter().copied());
+        words.extend(prefix_of.iter().copied());
+        phrases.extend(phrase.iter().copied());
+        phrases.extend(synonyms.iter().copied());
+
+        if let Lazy::Init(OneTypoTerm { split_words, one_typo }) = &self.one_typo {
+            words.extend(one_typo.iter().copied());
+            phrases.extend(split_words.iter().copied());
+        };
+
+        if let Lazy::Init(TwoTypoTerm { two_typos }) = &self.two_typo {
+            words.extend(two_typos.iter().copied());
+        };
+
+        (words.into_iter().collect(), phrases.into_iter().collect())
+    }
+}
--- a/crates/milli/src/search/new/query_term/ntypo_subset.rs
+++ b/crates/milli/src/search/new/query_term/ntypo_subset.rs
@ -0,0 +1,79 @@
+use std::collections::BTreeSet;
+
+use super::Phrase;
+use crate::search::new::interner::Interned;
+
+#[derive(Debug, Clone, PartialEq, Eq, Hash)]
+pub enum NTypoTermSubset {
+    All,
+    Subset {
+        words: BTreeSet<Interned<String>>,
+        phrases: BTreeSet<Interned<Phrase>>,
+        // TODO: prefixes: BTreeSet<Interned<String>>,
+    },
+    Nothing,
+}
+
+impl NTypoTermSubset {
+    pub fn contains_word(&self, word: Interned<String>) -> bool {
+        match self {
+            NTypoTermSubset::All => true,
+            NTypoTermSubset::Subset { words, phrases: _ } => words.contains(&word),
+            NTypoTermSubset::Nothing => false,
+        }
+    }
+    pub fn contains_phrase(&self, phrase: Interned<Phrase>) -> bool {
+        match self {
+            NTypoTermSubset::All => true,
+            NTypoTermSubset::Subset { words: _, phrases } => phrases.contains(&phrase),
+            NTypoTermSubset::Nothing => false,
+        }
+    }
+    pub fn is_empty(&self) -> bool {
+        match self {
+            NTypoTermSubset::All => false,
+            NTypoTermSubset::Subset { words, phrases } => words.is_empty() && phrases.is_empty(),
+            NTypoTermSubset::Nothing => true,
+        }
+    }
+    pub fn union(&mut self, other: &Self) {
+        match self {
+            Self::All => {}
+            Self::Subset { words, phrases } => match other {
+                Self::All => {
+                    *self = Self::All;
+                }
+                Self::Subset { words: w2, phrases: p2 } => {
+                    words.extend(w2);
+                    phrases.extend(p2);
+                }
+                Self::Nothing => {}
+            },
+            Self::Nothing => {
+                *self = other.clone();
+            }
+        }
+    }
+    pub fn intersect(&mut self, other: &Self) {
+        match self {
+            Self::All => *self = other.clone(),
+            Self::Subset { words, phrases } => match other {
+                Self::All => {}
+                Self::Subset { words: w2, phrases: p2 } => {
+                    let mut ws = BTreeSet::new();
+                    for w in words.intersection(w2) {
+                        ws.insert(*w);
+                    }
+                    let mut ps = BTreeSet::new();
+                    for p in phrases.intersection(p2) {
+                        ps.insert(*p);
+                    }
+                    *words = ws;
+                    *phrases = ps;
+                }
+                Self::Nothing => *self = Self::Nothing,
+            },
+            Self::Nothing => {}
+        }
+    }
+}
--- a/crates/milli/src/search/new/query_term/parse_query.rs
+++ b/crates/milli/src/search/new/query_term/parse_query.rs
@ -0,0 +1,382 @@
+use std::collections::BTreeSet;
+
+use charabia::normalizer::NormalizedTokenIter;
+use charabia::{SeparatorKind, TokenKind};
+
+use super::compute_derivations::partially_initialized_term_from_word;
+use super::{LocatedQueryTerm, ZeroTypoTerm};
+use crate::search::new::query_term::{Lazy, Phrase, QueryTerm};
+use crate::search::new::Word;
+use crate::{Result, SearchContext, MAX_WORD_LENGTH};
+
+#[derive(Clone)]
+/// Extraction of the content of a query.
+pub struct ExtractedTokens {
+    /// The terms to search for in the database.
+    pub query_terms: Vec<LocatedQueryTerm>,
+    /// The words that must not appear in the results.
+    pub negative_words: Vec<Word>,
+    /// The phrases that must not appear in the results.
+    pub negative_phrases: Vec<LocatedQueryTerm>,
+}
+
+/// Convert the tokenised search query into a list of located query terms.
+#[tracing::instrument(level = "trace", skip_all, target = "search::query")]
+pub fn located_query_terms_from_tokens(
+    ctx: &mut SearchContext<'_>,
+    query: NormalizedTokenIter<'_, '_, '_, '_>,
+    words_limit: Option<usize>,
+) -> Result<ExtractedTokens> {
+    let nbr_typos = number_of_typos_allowed(ctx)?;
+
+    let mut query_terms = Vec::new();
+
+    let mut negative_phrase = false;
+    let mut phrase: Option<PhraseBuilder> = None;
+    let mut encountered_whitespace = true;
+    let mut negative_next_token = false;
+    let mut negative_words = Vec::new();
+    let mut negative_phrases = Vec::new();
+
+    let parts_limit = words_limit.unwrap_or(usize::MAX);
+
+    // start with the last position as we will wrap around to position 0 at the beginning of the loop below.
+    let mut position = u16::MAX;
+
+    let mut peekable = query.take(super::limits::MAX_TOKEN_COUNT).peekable();
+    while let Some(token) = peekable.next() {
+        if token.lemma().is_empty() {
+            continue;
+        }
+
+        // early return if word limit is exceeded
+        if query_terms.len() >= parts_limit {
+            return Ok(ExtractedTokens { query_terms, negative_words, negative_phrases });
+        }
+
+        match token.kind {
+            TokenKind::Word | TokenKind::StopWord => {
+                // On first loop, goes from u16::MAX to 0, then normal increment.
+                position = position.wrapping_add(1);
+
+                // 1. if the word is quoted we push it in a phrase-buffer waiting for the ending quote,
+                // 2. if the word is not the last token of the query and is not a stop_word we push it as a non-prefix word,
+                // 3. if the word is the last token of the query we push it as a prefix word.
+                if let Some(phrase) = &mut phrase {
+                    phrase.push_word(ctx, &token, position)
+                } else if negative_next_token {
+                    let word = token.lemma().to_string();
+                    let word = Word::Original(ctx.word_interner.insert(word));
+                    negative_words.push(word);
+                    negative_next_token = false;
+                } else if peekable.peek().is_some() {
+                    match token.kind {
+                        TokenKind::Word => {
+                            let word = token.lemma();
+                            let term = partially_initialized_term_from_word(
+                                ctx,
+                                word,
+                                nbr_typos(word),
+                                false,
+                                false,
+                            )?;
+                            let located_term = LocatedQueryTerm {
+                                value: ctx.term_interner.push(term),
+                                positions: position..=position,
+                            };
+                            query_terms.push(located_term);
+                        }
+                        TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => (),
+                    }
+                } else {
+                    let word = token.lemma();
+                    let term = partially_initialized_term_from_word(
+                        ctx,
+                        word,
+                        nbr_typos(word),
+                        true,
+                        false,
+                    )?;
+                    let located_term = LocatedQueryTerm {
+                        value: ctx.term_interner.push(term),
+                        positions: position..=position,
+                    };
+                    query_terms.push(located_term);
+                }
+            }
+            TokenKind::Separator(separator_kind) => {
+                // add penalty for hard separators
+                if let SeparatorKind::Hard = separator_kind {
+                    position = position.wrapping_add(7);
+                }
+
+                phrase = 'phrase: {
+                    let phrase = phrase.take();
+
+                    // If we have a hard separator inside a phrase, we immediately start a new phrase
+                    let phrase = if separator_kind == SeparatorKind::Hard {
+                        if let Some(phrase) = phrase {
+                            if let Some(located_query_term) = phrase.build(ctx) {
+                                // as we are evaluating a negative operator we put the phrase
+                                // in the negative one *but* we don't reset the negative operator
+                                // as we are immediately starting a new negative phrase.
+                                if negative_phrase {
+                                    negative_phrases.push(located_query_term);
+                                } else {
+                                    query_terms.push(located_query_term);
+                                }
+                            }
+                            Some(PhraseBuilder::empty())
+                        } else {
+                            None
+                        }
+                    } else {
+                        phrase
+                    };
+
+                    // We close and start a new phrase depending on the number of double quotes
+                    let mut quote_count = token.lemma().chars().filter(|&s| s == '"').count();
+                    if quote_count == 0 {
+                        break 'phrase phrase;
+                    }
+
+                    // Consume the closing quote and the phrase
+                    if let Some(phrase) = phrase {
+                        // Per the check above, quote_count > 0
+                        quote_count -= 1;
+                        if let Some(located_query_term) = phrase.build(ctx) {
+                            // we were evaluating a negative operator so we
+                            // put the phrase in the negative phrases
+                            if negative_phrase {
+                                negative_phrases.push(located_query_term);
+                                negative_phrase = false;
+                            } else {
+                                query_terms.push(located_query_term);
+                            }
+                        }
+                    }
+
+                    // Start new phrase if the token ends with an opening quote
+                    if quote_count % 2 == 1 {
+                        negative_phrase = negative_next_token;
+                        Some(PhraseBuilder::empty())
+                    } else {
+                        None
+                    }
+                };
+
+                negative_next_token =
+                    phrase.is_none() && token.lemma() == "-" && encountered_whitespace;
+            }
+            _ => (),
+        }
+
+        encountered_whitespace =
+            token.lemma().chars().last().filter(|c| c.is_whitespace()).is_some();
+    }
+
+    // If a quote is never closed, we consider all of the end of the query as a phrase.
+    if let Some(phrase) = phrase.take() {
+        if let Some(located_query_term) = phrase.build(ctx) {
+            // put the phrase in the negative set if we are evaluating a negative operator.
+            if negative_phrase {
+                negative_phrases.push(located_query_term);
+            } else {
+                query_terms.push(located_query_term);
+            }
+        }
+    }
+
+    Ok(ExtractedTokens { query_terms, negative_words, negative_phrases })
+}
+
+pub fn number_of_typos_allowed<'ctx>(
+    ctx: &SearchContext<'ctx>,
+) -> Result<impl Fn(&str) -> u8 + 'ctx> {
+    let authorize_typos = ctx.index.authorize_typos(ctx.txn)?;
+    let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?;
+    let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?;
+
+    let exact_words = ctx.index.exact_words(ctx.txn)?;
+
+    Ok(Box::new(move |word: &str| {
+        if !authorize_typos
+            || word.len() < min_len_one_typo as usize
+            || exact_words.as_ref().map_or(false, |fst| fst.contains(word))
+        {
+            0
+        } else if word.len() < min_len_two_typos as usize {
+            1
+        } else {
+            2
+        }
+    }))
+}
+
+pub fn make_ngram(
+    ctx: &mut SearchContext<'_>,
+    terms: &[LocatedQueryTerm],
+    number_of_typos_allowed: &impl Fn(&str) -> u8,
+) -> Result<Option<LocatedQueryTerm>> {
+    assert!(!terms.is_empty());
+    for t in terms {
+        if ctx.term_interner.get(t.value).zero_typo.phrase.is_some() {
+            return Ok(None);
+        }
+    }
+    for ts in terms.windows(2) {
+        let [t1, t2] = ts else { panic!() };
+        if *t1.positions.end() != t2.positions.start() - 1 {
+            return Ok(None);
+        }
+    }
+    let mut words_interned = vec![];
+    for term in terms {
+        if let Some(original_term_word) = term.value.original_single_word(ctx) {
+            words_interned.push(original_term_word);
+        } else {
+            return Ok(None);
+        }
+    }
+    let words =
+        words_interned.iter().map(|&i| ctx.word_interner.get(i).to_owned()).collect::<Vec<_>>();
+
+    let start = *terms.first().as_ref().unwrap().positions.start();
+    let end = *terms.last().as_ref().unwrap().positions.end();
+    let is_prefix = ctx.term_interner.get(terms.last().as_ref().unwrap().value).is_prefix;
+    let ngram_str = words.join("");
+    if ngram_str.len() > MAX_WORD_LENGTH {
+        return Ok(None);
+    }
+    let ngram_str_interned = ctx.word_interner.insert(ngram_str.clone());
+
+    let max_nbr_typos =
+        number_of_typos_allowed(ngram_str.as_str()).saturating_sub(terms.len() as u8 - 1);
+
+    let mut term =
+        partially_initialized_term_from_word(ctx, &ngram_str, max_nbr_typos, is_prefix, true)?;
+
+    // Now add the synonyms
+    let index_synonyms = ctx.index.synonyms(ctx.txn)?;
+
+    term.zero_typo.synonyms.extend(
+        index_synonyms.get(&words).cloned().unwrap_or_default().into_iter().map(|words| {
+            let words = words.into_iter().map(|w| Some(ctx.word_interner.insert(w))).collect();
+            ctx.phrase_interner.insert(Phrase { words })
+        }),
+    );
+
+    let term = QueryTerm {
+        original: ngram_str_interned,
+        ngram_words: Some(words_interned),
+        is_prefix,
+        max_levenshtein_distance: max_nbr_typos,
+        zero_typo: term.zero_typo,
+        one_typo: Lazy::Uninit,
+        two_typo: Lazy::Uninit,
+    };
+
+    let term = LocatedQueryTerm { value: ctx.term_interner.push(term), positions: start..=end };
+
+    Ok(Some(term))
+}
+
+struct PhraseBuilder {
+    words: Vec<Option<crate::search::new::Interned<String>>>,
+    start: u16,
+    end: u16,
+}
+
+impl PhraseBuilder {
+    fn empty() -> Self {
+        Self { words: Default::default(), start: u16::MAX, end: u16::MAX }
+    }
+
+    fn is_empty(&self) -> bool {
+        self.words.is_empty() || self.words.iter().all(Option::is_none)
+    }
+
+    // precondition: token has kind Word or StopWord
+    fn push_word(
+        &mut self,
+        ctx: &mut SearchContext<'_>,
+        token: &charabia::Token<'_>,
+        position: u16,
+    ) {
+        if self.is_empty() {
+            self.start = position;
+        }
+        self.end = position;
+        if let TokenKind::StopWord = token.kind {
+            self.words.push(None);
+        } else {
+            // token has kind Word
+            let word = ctx.word_interner.insert(token.lemma().to_string());
+            self.words.push(Some(word));
+        }
+    }
+
+    fn build(self, ctx: &mut SearchContext<'_>) -> Option<LocatedQueryTerm> {
+        if self.is_empty() {
+            return None;
+        }
+        Some(LocatedQueryTerm {
+            value: ctx.term_interner.push({
+                let phrase = ctx.phrase_interner.insert(Phrase { words: self.words });
+                let phrase_desc = phrase.description(ctx);
+                QueryTerm {
+                    original: ctx.word_interner.insert(phrase_desc),
+                    ngram_words: None,
+                    max_levenshtein_distance: 0,
+                    is_prefix: false,
+                    zero_typo: ZeroTypoTerm {
+                        phrase: Some(phrase),
+                        exact: None,
+                        prefix_of: BTreeSet::default(),
+                        synonyms: BTreeSet::default(),
+                        use_prefix_db: None,
+                    },
+                    one_typo: Lazy::Uninit,
+                    two_typo: Lazy::Uninit,
+                }
+            }),
+            positions: self.start..=self.end,
+        })
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use charabia::TokenizerBuilder;
+
+    use super::*;
+    use crate::index::tests::TempIndex;
+
+    fn temp_index_with_documents() -> TempIndex {
+        let temp_index = TempIndex::new();
+        temp_index
+            .add_documents(documents!([
+                { "id": 1, "name": "split this world westfali westfalia the Ŵôřlḑôle" },
+                { "id": 2, "name": "Westfália" },
+                { "id": 3, "name": "Ŵôřlḑôle" },
+            ]))
+            .unwrap();
+        temp_index
+    }
+
+    #[test]
+    fn start_with_hard_separator() -> Result<()> {
+        let mut builder = TokenizerBuilder::default();
+        let tokenizer = builder.build();
+        let tokens = tokenizer.tokenize(".");
+        let index = temp_index_with_documents();
+        let rtxn = index.read_txn()?;
+        let mut ctx = SearchContext::new(&index, &rtxn)?;
+        // panics with `attempt to add with overflow` before <https://github.com/meilisearch/meilisearch/issues/3785>
+        let ExtractedTokens { query_terms, .. } =
+            located_query_terms_from_tokens(&mut ctx, tokens, None)?;
+        assert!(query_terms.is_empty());
+
+        Ok(())
+    }
+}
--- a/crates/milli/src/search/new/query_term/phrase.rs
+++ b/crates/milli/src/search/new/query_term/phrase.rs
@ -0,0 +1,21 @@
+use itertools::Itertools;
+
+use crate::search::new::interner::Interned;
+use crate::SearchContext;
+
+/// A phrase in the user's search query, consisting of several words
+/// that must appear side-by-side in the search results.
+#[derive(Default, Clone, PartialEq, Eq, Hash)]
+pub struct Phrase {
+    pub words: Vec<Option<Interned<String>>>,
+}
+impl Interned<Phrase> {
+    pub fn description(self, ctx: &SearchContext<'_>) -> String {
+        let p = ctx.phrase_interner.get(self);
+        p.words.iter().flatten().map(|w| ctx.word_interner.get(*w)).join(" ")
+    }
+    pub fn words(self, ctx: &SearchContext<'_>) -> Vec<Option<Interned<String>>> {
+        let p = ctx.phrase_interner.get(self);
+        p.words.clone()
+    }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/build.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/build.rs
@ -0,0 +1,92 @@
+use std::collections::HashSet;
+
+use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
+use crate::search::new::interner::{DedupInterner, MappedInterner};
+use crate::search::new::query_graph::{QueryNode, QueryNodeData};
+use crate::search::new::small_bitmap::SmallBitmap;
+use crate::search::new::{QueryGraph, SearchContext};
+use crate::Result;
+
+impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
+    /// Build the ranking rule graph from the given query graph
+    pub fn build(
+        ctx: &mut SearchContext<'_>,
+        query_graph: QueryGraph,
+        cost_of_ignoring_node: MappedInterner<QueryNode, Option<(u32, SmallBitmap<QueryNode>)>>,
+    ) -> Result<Self> {
+        let QueryGraph { nodes: graph_nodes, .. } = &query_graph;
+
+        let mut conditions_interner = DedupInterner::default();
+
+        let mut edges_store = DedupInterner::default();
+        let mut edges_of_node = query_graph.nodes.map(|_| HashSet::new());
+
+        for (source_id, source_node) in graph_nodes.iter() {
+            let new_edges = edges_of_node.get_mut(source_id);
+
+            for dest_idx in source_node.successors.iter() {
+                let src_term = match &source_node.data {
+                    QueryNodeData::Term(t) => Some(t),
+                    QueryNodeData::Start => None,
+                    QueryNodeData::Deleted | QueryNodeData::End => panic!(),
+                };
+                let dest_node = graph_nodes.get(dest_idx);
+                let dest_term = match &dest_node.data {
+                    QueryNodeData::Term(t) => t,
+                    QueryNodeData::End => {
+                        let new_edge_id = edges_store.insert(Some(Edge {
+                            source_node: source_id,
+                            dest_node: dest_idx,
+                            cost: 0,
+                            condition: None,
+                            nodes_to_skip: SmallBitmap::for_interned_values_in(graph_nodes),
+                        }));
+                        new_edges.insert(new_edge_id);
+                        continue;
+                    }
+                    QueryNodeData::Deleted | QueryNodeData::Start => panic!(),
+                };
+                if let Some((cost_of_ignoring, forbidden_nodes)) =
+                    cost_of_ignoring_node.get(dest_idx)
+                {
+                    let dest = graph_nodes.get(dest_idx);
+                    let dest_size = match &dest.data {
+                        QueryNodeData::Term(term) => term.term_ids.len(),
+                        _ => panic!(),
+                    };
+                    let new_edge_id = edges_store.insert(Some(Edge {
+                        source_node: source_id,
+                        dest_node: dest_idx,
+                        cost: *cost_of_ignoring * dest_size as u32,
+                        condition: None,
+                        nodes_to_skip: forbidden_nodes.clone(),
+                    }));
+                    new_edges.insert(new_edge_id);
+                }
+
+                let edges = G::build_edges(ctx, &mut conditions_interner, src_term, dest_term)?;
+                if edges.is_empty() {
+                    continue;
+                }
+
+                for (cost, condition) in edges {
+                    let new_edge_id = edges_store.insert(Some(Edge {
+                        source_node: source_id,
+                        dest_node: dest_idx,
+                        cost,
+                        condition: Some(condition),
+                        nodes_to_skip: SmallBitmap::for_interned_values_in(graph_nodes),
+                    }));
+                    new_edges.insert(new_edge_id);
+                }
+            }
+        }
+        let edges_store = edges_store.freeze();
+        let edges_of_node =
+            edges_of_node.map(|edges| SmallBitmap::from_iter(edges.iter().copied(), &edges_store));
+
+        let conditions_interner = conditions_interner.freeze();
+
+        Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node, conditions_interner })
+    }
+}
--- a/crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs
+++ b/crates/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs
@ -0,0 +1,400 @@
+/** Implements a "PathVisitor" which finds all paths of a certain cost
+from the START to END node of a ranking rule graph.
+
+A path is a list of conditions. A condition is the data associated with
+an edge, given by the ranking rule. Some edges don't have a condition associated
+with them, they are "unconditional". These kinds of edges are used to "skip" a node.
+
+The algorithm uses a depth-first search. It benefits from two main optimisations:
+- The list of all possible costs to go from any node to the END node is precomputed
+- The `DeadEndsCache` reduces the number of valid paths drastically, by making some edges
+untraversable depending on what other edges were selected.
+
+These two optimisations are meant to avoid traversing edges that wouldn't lead
+to a valid path. In practically all cases, we avoid the exponential complexity
+that is inherent to depth-first search in a large ranking rule graph.
+
+The DeadEndsCache is a sort of prefix tree which associates a list of forbidden
+conditions to a list of traversed conditions.
+For example, the DeadEndsCache could say the following:
+- Immediately, from the start, the conditions `[a,b]` are forbidden
+    - if we take the condition `c`, then the conditions `[e]` are also forbidden
+        - and if after that, we take `f`, then `[h,i]` are also forbidden
+            - etc.
+    - if we take `g`, then `[f]` is also forbidden
+        - etc.
+    - etc.
+As we traverse the graph, we also traverse the `DeadEndsCache` and keep a list of forbidden
+conditions in memory. Then, we know to avoid all edges which have a condition that is forbidden.
+
+When a path is found from START to END, we give it to the `visit` closure.
+This closure takes a mutable reference to the `DeadEndsCache`. This means that
+the caller can update this cache. Therefore, we must handle the case where the
+DeadEndsCache has been updated. This means potentially backtracking up to the point
+where the traversed conditions are all allowed by the new DeadEndsCache.
+
+The algorithm also implements the `TermsMatchingStrategy` logic.
+Some edges are augmented with a list of "nodes_to_skip". Skipping
+a node means "reaching this node through an unconditional edge". If we have
+already traversed (ie. not skipped) a node that is in this list, then we know that we
+can't traverse this edge. Otherwise, we traverse the edge but make sure to skip any
+future node that was present in the "nodes_to_skip" list.
+
+The caller can decide to stop the path finding algorithm
+by returning a `ControlFlow::Break` from the `visit` closure.
+*/
+use std::collections::{BTreeSet, VecDeque};
+use std::iter::FromIterator;
+use std::ops::ControlFlow;
+
+use fxhash::FxHashSet;
+
+use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
+use crate::search::new::interner::{Interned, MappedInterner};
+use crate::search::new::query_graph::QueryNode;
+use crate::search::new::small_bitmap::SmallBitmap;
+use crate::Result;
+
+/// Closure which processes a path found by the `PathVisitor`
+type VisitFn<'f, G> = &'f mut dyn FnMut(
+    // the path as a list of conditions
+    &[Interned<<G as RankingRuleGraphTrait>::Condition>],
+    &mut RankingRuleGraph<G>,
+    // a mutable reference to the DeadEndsCache, to update it in case the given
+    // path doesn't resolve to any valid document ids
+    &mut DeadEndsCache<<G as RankingRuleGraphTrait>::Condition>,
+) -> Result<ControlFlow<()>>;
+
+/// A structure which is kept but not updated during the traversal of the graph.
+/// It can however be updated by the `visit` closure once a valid path has been found.
+struct VisitorContext<'a, G: RankingRuleGraphTrait> {
+    graph: &'a mut RankingRuleGraph<G>,
+    all_costs_from_node: &'a MappedInterner<QueryNode, Vec<u64>>,
+    dead_ends_cache: &'a mut DeadEndsCache<G::Condition>,
+}
+
+/// The internal state of the traversal algorithm
+struct VisitorState<G: RankingRuleGraphTrait> {
+    /// Budget from the current node to the end node
+    remaining_cost: u64,
+    /// Previously visited conditions, in order.
+    path: Vec<Interned<G::Condition>>,
+    /// Previously visited conditions, as an efficient and compact set.
+    visited_conditions: SmallBitmap<G::Condition>,
+    /// Previously visited (ie not skipped) nodes, as an efficient and compact set.
+    visited_nodes: SmallBitmap<QueryNode>,
+    /// The conditions that cannot be visited anymore
+    forbidden_conditions: SmallBitmap<G::Condition>,
+    /// The nodes that cannot be visited anymore (they must be skipped)
+    nodes_to_skip: SmallBitmap<QueryNode>,
+}
+
+/// See module documentation
+pub struct PathVisitor<'a, G: RankingRuleGraphTrait> {
+    state: VisitorState<G>,
+    ctx: VisitorContext<'a, G>,
+}
+impl<'a, G: RankingRuleGraphTrait> PathVisitor<'a, G> {
+    pub fn new(
+        cost: u64,
+        graph: &'a mut RankingRuleGraph<G>,
+        all_costs_from_node: &'a MappedInterner<QueryNode, Vec<u64>>,
+        dead_ends_cache: &'a mut DeadEndsCache<G::Condition>,
+    ) -> Self {
+        Self {
+            state: VisitorState {
+                remaining_cost: cost,
+                path: vec![],
+                visited_conditions: SmallBitmap::for_interned_values_in(&graph.conditions_interner),
+                visited_nodes: SmallBitmap::for_interned_values_in(&graph.query_graph.nodes),
+                forbidden_conditions: SmallBitmap::for_interned_values_in(
+                    &graph.conditions_interner,
+                ),
+                nodes_to_skip: SmallBitmap::for_interned_values_in(&graph.query_graph.nodes),
+            },
+            ctx: VisitorContext { graph, all_costs_from_node, dead_ends_cache },
+        }
+    }
+
+    /// See module documentation
+    pub fn visit_paths(mut self, visit: VisitFn<'_, G>) -> Result<()> {
+        let _ =
+            self.state.visit_node(self.ctx.graph.query_graph.root_node, visit, &mut self.ctx)?;
+        Ok(())
+    }
+}
+
+impl<G: RankingRuleGraphTrait> VisitorState<G> {
+    /// Visits a node: traverse all its valid conditional and unconditional edges.
+    ///
+    /// Returns ControlFlow::Break if the path finding algorithm should stop.
+    /// Returns whether a valid path was found from this node otherwise.
+    fn visit_node(
+        &mut self,
+        from_node: Interned<QueryNode>,
+        visit: VisitFn<'_, G>,
+        ctx: &mut VisitorContext<'_, G>,
+    ) -> Result<ControlFlow<(), bool>> {
+        // any valid path will be found from this point
+        // if a valid path was found, then we know that the DeadEndsCache may have been updated,
+        // and we will need to do more work to potentially backtrack
+        let mut any_valid = false;
+
+        let edges = ctx.graph.edges_of_node.get(from_node).clone();
+        for edge_idx in edges.iter() {
+            // could be none if the edge was deleted
+            let Some(edge) = ctx.graph.edges_store.get(edge_idx).clone() else { continue };
+
+            if self.remaining_cost < edge.cost as u64 {
+                continue;
+            }
+            self.remaining_cost -= edge.cost as u64;
+
+            let cf = match edge.condition {
+                Some(condition) => self.visit_condition(
+                    condition,
+                    edge.dest_node,
+                    &edge.nodes_to_skip,
+                    visit,
+                    ctx,
+                )?,
+                None => self.visit_no_condition(edge.dest_node, &edge.nodes_to_skip, visit, ctx)?,
+            };
+            self.remaining_cost += edge.cost as u64;
+
+            let ControlFlow::Continue(next_any_valid) = cf else {
+                return Ok(ControlFlow::Break(()));
+            };
+            any_valid |= next_any_valid;
+            if next_any_valid {
+                // backtrack as much as possible if a valid path was found and the dead_ends_cache
+                // was updated such that the current prefix is now invalid
+                self.forbidden_conditions = ctx
+                    .dead_ends_cache
+                    .forbidden_conditions_for_all_prefixes_up_to(self.path.iter().copied());
+                if self.visited_conditions.intersects(&self.forbidden_conditions) {
+                    return Ok(ControlFlow::Continue(true));
+                }
+            }
+        }
+
+        Ok(ControlFlow::Continue(any_valid))
+    }
+
+    /// Visits an unconditional edge.
+    ///
+    /// Returns ControlFlow::Break if the path finding algorithm should stop.
+    /// Returns whether a valid path was found from this node otherwise.
+    fn visit_no_condition(
+        &mut self,
+        dest_node: Interned<QueryNode>,
+        edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
+        visit: VisitFn<'_, G>,
+        ctx: &mut VisitorContext<'_, G>,
+    ) -> Result<ControlFlow<(), bool>> {
+        if !ctx
+            .all_costs_from_node
+            .get(dest_node)
+            .iter()
+            .any(|next_cost| *next_cost == self.remaining_cost)
+        {
+            return Ok(ControlFlow::Continue(false));
+        }
+        // We've reached the END node!
+        if dest_node == ctx.graph.query_graph.end_node {
+            let control_flow = visit(&self.path, ctx.graph, ctx.dead_ends_cache)?;
+            // We could change the return type of the visit closure such that the caller
+            // tells us whether the dead ends cache was updated or not.
+            // Alternatively, maybe the DeadEndsCache should have a generation number
+            // to it, so that we don't need to play with these booleans at all.
+            match control_flow {
+                ControlFlow::Continue(_) => Ok(ControlFlow::Continue(true)),
+                ControlFlow::Break(_) => Ok(ControlFlow::Break(())),
+            }
+        } else {
+            let old_fbct = self.nodes_to_skip.clone();
+            self.nodes_to_skip.union(edge_new_nodes_to_skip);
+            let cf = self.visit_node(dest_node, visit, ctx)?;
+            self.nodes_to_skip = old_fbct;
+            Ok(cf)
+        }
+    }
+    /// Visits a conditional edge.
+    ///
+    /// Returns ControlFlow::Break if the path finding algorithm should stop.
+    /// Returns whether a valid path was found from this node otherwise.
+    fn visit_condition(
+        &mut self,
+        condition: Interned<G::Condition>,
+        dest_node: Interned<QueryNode>,
+        edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
+        visit: VisitFn<'_, G>,
+        ctx: &mut VisitorContext<'_, G>,
+    ) -> Result<ControlFlow<(), bool>> {
+        assert!(dest_node != ctx.graph.query_graph.end_node);
+
+        if self.forbidden_conditions.contains(condition)
+            || self.nodes_to_skip.contains(dest_node)
+            || edge_new_nodes_to_skip.intersects(&self.visited_nodes)
+        {
+            return Ok(ControlFlow::Continue(false));
+        }
+
+        // Checking that from the destination node, there is at least
+        // one cost that we can visit that corresponds to our remaining budget.
+        if !ctx
+            .all_costs_from_node
+            .get(dest_node)
+            .iter()
+            .any(|next_cost| *next_cost == self.remaining_cost)
+        {
+            return Ok(ControlFlow::Continue(false));
+        }
+
+        self.path.push(condition);
+        self.visited_nodes.insert(dest_node);
+        self.visited_conditions.insert(condition);
+
+        let old_forb_cond = self.forbidden_conditions.clone();
+        if let Some(next_forbidden) =
+            ctx.dead_ends_cache.forbidden_conditions_after_prefix(self.path.iter().copied())
+        {
+            self.forbidden_conditions.union(&next_forbidden);
+        }
+        let old_nodes_to_skip = self.nodes_to_skip.clone();
+        self.nodes_to_skip.union(edge_new_nodes_to_skip);
+
+        let cf = self.visit_node(dest_node, visit, ctx)?;
+
+        self.nodes_to_skip = old_nodes_to_skip;
+        self.forbidden_conditions = old_forb_cond;
+
+        self.visited_conditions.remove(condition);
+        self.visited_nodes.remove(dest_node);
+        self.path.pop();
+
+        Ok(cf)
+    }
+}
+
+impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
+    pub fn find_all_costs_to_end(&self) -> MappedInterner<QueryNode, Vec<u64>> {
+        let mut costs_to_end = self.query_graph.nodes.map(|_| vec![]);
+
+        self.traverse_breadth_first_backward(self.query_graph.end_node, |cur_node| {
+            if cur_node == self.query_graph.end_node {
+                *costs_to_end.get_mut(self.query_graph.end_node) = vec![0];
+                return;
+            }
+            let mut self_costs = Vec::<u64>::new();
+
+            let cur_node_edges = &self.edges_of_node.get(cur_node);
+            for edge_idx in cur_node_edges.iter() {
+                let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
+                let succ_node = edge.dest_node;
+                let succ_costs = costs_to_end.get(succ_node);
+                for succ_cost in succ_costs {
+                    self_costs.push(edge.cost as u64 + succ_cost);
+                }
+            }
+            self_costs.sort_unstable();
+            self_costs.dedup();
+
+            *costs_to_end.get_mut(cur_node) = self_costs;
+        });
+        costs_to_end
+    }
+
+    pub fn update_all_costs_before_node(
+        &self,
+        node_with_removed_outgoing_conditions: Interned<QueryNode>,
+        costs: &mut MappedInterner<QueryNode, Vec<u64>>,
+    ) {
+        // Traverse the graph backward from the target node, recomputing the cost for each of its predecessors.
+        // We first check that no other node is contributing the same total cost to a predecessor before removing
+        // the cost from the predecessor.
+        self.traverse_breadth_first_backward(node_with_removed_outgoing_conditions, |cur_node| {
+            let mut costs_to_remove = FxHashSet::default();
+            costs_to_remove.extend(costs.get(cur_node).iter().copied());
+
+            let cur_node_edges = &self.edges_of_node.get(cur_node);
+            for edge_idx in cur_node_edges.iter() {
+                let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
+                for cost in costs.get(edge.dest_node).iter() {
+                    costs_to_remove.remove(&(*cost + edge.cost as u64));
+                    if costs_to_remove.is_empty() {
+                        return;
+                    }
+                }
+            }
+            if costs_to_remove.is_empty() {
+                return;
+            }
+            let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied());
+            for c in costs_to_remove {
+                new_costs.remove(&c);
+            }
+            *costs.get_mut(cur_node) = new_costs.into_iter().collect();
+        });
+    }
+
+    /// Traverse the graph backwards from the given node such that every time
+    /// a node is visited, we are guaranteed that all its successors either:
+    /// 1. have already been visited; OR
+    /// 2. were not reachable from the given node
+    pub fn traverse_breadth_first_backward(
+        &self,
+        from: Interned<QueryNode>,
+        mut visit: impl FnMut(Interned<QueryNode>),
+    ) {
+        let mut reachable = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+        {
+            // go backward to get the set of all reachable nodes from the given node
+            // the nodes that are not reachable will be set as `visited`
+            let mut stack = VecDeque::new();
+            let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+            enqueued.insert(from);
+            stack.push_back(from);
+            while let Some(n) = stack.pop_front() {
+                if reachable.contains(n) {
+                    continue;
+                }
+                reachable.insert(n);
+                for prev_node in self.query_graph.nodes.get(n).predecessors.iter() {
+                    if !enqueued.contains(prev_node) && !reachable.contains(prev_node) {
+                        stack.push_back(prev_node);
+                        enqueued.insert(prev_node);
+                    }
+                }
+            }
+        };
+        let mut unreachable_or_visited =
+            SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+        for (n, _) in self.query_graph.nodes.iter() {
+            if !reachable.contains(n) {
+                unreachable_or_visited.insert(n);
+            }
+        }
+
+        let mut enqueued = SmallBitmap::for_interned_values_in(&self.query_graph.nodes);
+        let mut stack = VecDeque::new();
+
+        enqueued.insert(from);
+        stack.push_back(from);
+
+        while let Some(cur_node) = stack.pop_front() {
+            if !self.query_graph.nodes.get(cur_node).successors.is_subset(&unreachable_or_visited) {
+                stack.push_back(cur_node);
+                continue;
+            }
+            unreachable_or_visited.insert(cur_node);
+            visit(cur_node);
+            for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
+                if !enqueued.contains(prev_node) && !unreachable_or_visited.contains(prev_node) {
+                    stack.push_back(prev_node);
+                    enqueued.insert(prev_node);
+                }
+            }
+        }
+    }
+}
--- a/Show more
+++ b/Show more