Merge branch 'main' into tmp-release-v1.11.0

2025-07-03 03:47:02 +02:00 · 2024-11-04 16:14:44 +01:00 · 2024-11-04 16:14:44 +01:00 · cf6ad1ae5e
commit cf6ad1ae5e
parent 3753f87fd8 28274292d8
1071 changed files with 263 additions and 106 deletions
--- a/crates/milli/Cargo.toml
+++ b/crates/milli/Cargo.toml
@ -0,0 +1,144 @@
+[package]
+name = "milli"
+edition = "2021"
+publish = false
+
+version.workspace = true
+authors.workspace = true
+description.workspace = true
+homepage.workspace = true
+readme.workspace = true
+# edition.workspace = true
+license.workspace = true
+
+[dependencies]
+bimap = { version = "0.6.3", features = ["serde"] }
+bincode = "1.3.3"
+bstr = "1.9.1"
+bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
+byteorder = "1.5.0"
+charabia = { version = "0.9.1", default-features = false }
+concat-arrays = "0.1.2"
+crossbeam-channel = "0.5.13"
+deserr = "0.6.2"
+either = { version = "1.13.0", features = ["serde"] }
+flatten-serde-json = { path = "../flatten-serde-json" }
+fst = "0.4.7"
+fxhash = "0.2.1"
+geoutils = "0.5.1"
+grenad = { version = "0.4.7", default-features = false, features = [
+    "rayon",
+    "tempfile",
+] }
+heed = { version = "0.20.3", default-features = false, features = [
+    "serde-json",
+    "serde-bincode",
+    "read-txn-no-tls",
+] }
+indexmap = { version = "2.2.6", features = ["serde"] }
+json-depth-checker = { path = "../json-depth-checker" }
+levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
+memchr = "2.5.0"
+memmap2 = "0.9.4"
+obkv = "0.2.2"
+once_cell = "1.19.0"
+ordered-float = "4.2.1"
+rayon = "1.10.0"
+roaring = { version = "0.10.6", features = ["serde"] }
+rstar = { version = "0.12.0", features = ["serde"] }
+serde = { version = "1.0.204", features = ["derive"] }
+serde_json = { version = "1.0.120", features = ["preserve_order"] }
+slice-group-by = "0.3.1"
+smallstr = { version = "0.3.0", features = ["serde"] }
+smallvec = "1.13.2"
+smartstring = "1.0.1"
+tempfile = "3.10.1"
+thiserror = "1.0.61"
+time = { version = "0.3.36", features = [
+    "serde-well-known",
+    "formatting",
+    "parsing",
+    "macros",
+] }
+uuid = { version = "1.10.0", features = ["v4"] }
+
+filter-parser = { path = "../filter-parser" }
+
+# documents words self-join
+itertools = "0.13.0"
+
+csv = "1.3.0"
+candle-core = { version = "0.6.0" }
+candle-transformers = { version = "0.6.0" }
+candle-nn = { version = "0.6.0" }
+tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
+    "onig",
+] }
+hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
+    "online",
+] }
+tiktoken-rs = "0.5.9"
+liquid = "0.26.6"
+rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
+arroy = "0.5.0"
+rand = "0.8.5"
+tracing = "0.1.40"
+ureq = { version = "2.10.0", features = ["json"] }
+url = "2.5.2"
+rayon-par-bridge = "0.1.0"
+
+[dev-dependencies]
+mimalloc = { version = "0.1.43", default-features = false }
+big_s = "1.0.2"
+insta = "1.39.0"
+maplit = "1.0.2"
+md5 = "0.7.0"
+meili-snap = { path = "../meili-snap" }
+rand = { version = "0.8.5", features = ["small_rng"] }
+
+[features]
+all-tokenizations = [
+    "charabia/default",
+]
+
+# Use POSIX semaphores instead of SysV semaphores in LMDB
+# For more information on this feature, see heed's Cargo.toml
+lmdb-posix-sem = ["heed/posix-sem"]
+
+# allow chinese specialized tokenization
+chinese = ["charabia/chinese"]
+chinese-pinyin = ["chinese", "charabia/chinese-normalization-pinyin"]
+
+# allow hebrew specialized tokenization
+hebrew = ["charabia/hebrew"]
+
+# allow japanese specialized tokenization
+japanese = ["charabia/japanese"]
+japanese-transliteration = ["charabia/japanese-transliteration"]
+
+# allow korean specialized tokenization
+korean = ["charabia/korean"]
+
+# allow thai specialized tokenization
+thai = ["charabia/thai"]
+
+# allow greek specialized tokenization
+greek = ["charabia/greek"]
+
+# allow khmer specialized tokenization
+khmer = ["charabia/khmer"]
+
+# allow vietnamese specialized tokenization
+vietnamese = ["charabia/vietnamese"]
+
+# allow german specialized tokenization
+german = ["charabia/german-segmentation"]
+
+# force swedish character recomposition
+swedish-recomposition = ["charabia/swedish-recomposition"]
+
+# allow turkish specialized tokenization
+turkish = ["charabia/turkish"]
+
+# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
+cuda = ["candle-core/cuda"]
--- a/crates/milli/README.md
+++ b/crates/milli/README.md
@ -0,0 +1,16 @@
+<p align="center">
+  <img alt="the milli logo" src="../assets/milli-logo.svg">
+</p>
+
+<p align="center">a concurrent indexer combined with fast and relevant search algorithms</p>
+
+## Introduction
+
+This crate contains the internal engine used by [Meilisearch].
+
+It contains a library that can manage one and only one index. Meilisearch
+manages the multi-index itself. Milli is unable to store updates in a store:
+it is the job of something else above and this is why it is only able
+to process one update at a time.
+
+[Meilisearch]: https://github.com/meilisearch/meilisearch
--- a/crates/milli/examples/index.rs
+++ b/crates/milli/examples/index.rs
@ -0,0 +1,114 @@
+use std::error::Error;
+use std::fs::File;
+use std::io::{BufRead, BufReader, Cursor, Seek};
+use std::path::Path;
+
+use heed::EnvOpenOptions;
+use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
+use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
+use milli::{Index, Object};
+
+fn usage(error: &str, program_name: &str) -> String {
+    format!(
+        "{}. Usage: {} <PATH-TO-INDEX> <PATH-TO-DATASET> [searchable_fields] [filterable_fields]",
+        error, program_name
+    )
+}
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let mut args = std::env::args();
+    let program_name = args.next().expect("No program name");
+    let index_path =
+        args.next().unwrap_or_else(|| panic!("{}", usage("Missing path to index.", &program_name)));
+    let dataset_path = args
+        .next()
+        .unwrap_or_else(|| panic!("{}", usage("Missing path to source dataset.", &program_name)));
+    // let primary_key = args.next().unwrap_or_else(|| "id".into());
+    // "title overview"
+    let searchable_fields: Vec<String> = args
+        .next()
+        .map(|arg| arg.split_whitespace().map(ToString::to_string).collect())
+        .unwrap_or_default();
+
+    println!("{searchable_fields:?}");
+    // "release_date genres"
+    let filterable_fields: Vec<String> = args
+        .next()
+        .map(|arg| arg.split_whitespace().map(ToString::to_string).collect())
+        .unwrap_or_default();
+
+    let mut options = EnvOpenOptions::new();
+    options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
+
+    std::fs::create_dir_all(&index_path).unwrap();
+    let index = Index::new(options, index_path).unwrap();
+    let mut wtxn = index.write_txn().unwrap();
+
+    let config = IndexerConfig::default();
+    let mut builder = Settings::new(&mut wtxn, &index, &config);
+    // builder.set_primary_key(primary_key);
+    let searchable_fields = searchable_fields.iter().map(|s| s.to_string()).collect();
+    builder.set_searchable_fields(searchable_fields);
+    let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
+    builder.set_filterable_fields(filterable_fields);
+
+    builder.execute(|_| (), || false).unwrap();
+
+    let config = IndexerConfig::default();
+    let indexing_config = IndexDocumentsConfig::default();
+
+    let builder =
+        IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
+
+    let documents = documents_from(
+        &dataset_path,
+        Path::new(&dataset_path).extension().unwrap_or_default().to_str().unwrap_or_default(),
+    );
+    let (builder, user_error) = builder.add_documents(documents).unwrap();
+    user_error.unwrap();
+    builder.execute().unwrap();
+    wtxn.commit().unwrap();
+
+    index.prepare_for_closing().wait();
+    Ok(())
+}
+fn documents_from(filename: &str, filetype: &str) -> DocumentsBatchReader<impl BufRead + Seek> {
+    let reader = File::open(filename)
+        .unwrap_or_else(|_| panic!("could not find the dataset in: {}", filename));
+    let reader = BufReader::new(reader);
+    let documents = match filetype {
+        "csv" => documents_from_csv(reader).unwrap(),
+        "json" => documents_from_json(reader).unwrap(),
+        "jsonl" => documents_from_jsonl(reader).unwrap(),
+        otherwise => panic!("invalid update format {:?}", otherwise),
+    };
+    DocumentsBatchReader::from_reader(Cursor::new(documents)).unwrap()
+}
+
+fn documents_from_jsonl(reader: impl BufRead) -> milli::Result<Vec<u8>> {
+    let mut documents = DocumentsBatchBuilder::new(Vec::new());
+
+    for result in serde_json::Deserializer::from_reader(reader).into_iter::<Object>() {
+        let object = result.unwrap();
+        documents.append_json_object(&object)?;
+    }
+
+    documents.into_inner().map_err(Into::into)
+}
+
+fn documents_from_json(reader: impl BufRead) -> milli::Result<Vec<u8>> {
+    let mut documents = DocumentsBatchBuilder::new(Vec::new());
+
+    documents.append_json_array(reader)?;
+
+    documents.into_inner().map_err(Into::into)
+}
+
+fn documents_from_csv(reader: impl BufRead) -> milli::Result<Vec<u8>> {
+    let csv = csv::Reader::from_reader(reader);
+
+    let mut documents = DocumentsBatchBuilder::new(Vec::new());
+    documents.append_csv(csv)?;
+
+    documents.into_inner().map_err(Into::into)
+}
--- a/crates/milli/examples/search.rs
+++ b/crates/milli/examples/search.rs
@ -0,0 +1,124 @@
+use std::error::Error;
+use std::io::stdin;
+use std::path::Path;
+use std::time::Instant;
+
+use heed::EnvOpenOptions;
+use milli::{
+    execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext,
+    SearchLogger, TermsMatchingStrategy, TimeBudget,
+};
+
+#[global_allocator]
+static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
+
+fn main() -> Result<(), Box<dyn Error>> {
+    let mut args = std::env::args();
+    let program_name = args.next().expect("No program name");
+    let dataset = args.next().unwrap_or_else(|| {
+        panic!(
+            "Missing path to index. Usage: {} <PATH-TO-INDEX> [<logger-dir>] [print-documents]",
+            program_name
+        )
+    });
+    let detailed_logger_dir = args.next();
+    let print_documents: bool =
+        if let Some(arg) = args.next() { arg == "print-documents" } else { false };
+
+    let mut options = EnvOpenOptions::new();
+    options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
+
+    let index = Index::new(options, dataset)?;
+    let txn = index.read_txn()?;
+    let mut query = String::new();
+    while stdin().read_line(&mut query)? > 0 {
+        for _ in 0..2 {
+            let mut default_logger = DefaultSearchLogger;
+            // FIXME: consider resetting the state of the logger between search executions as otherwise panics are possible.
+            // Workaround'd here by recreating the logger on each iteration of the loop
+            let mut detailed_logger = detailed_logger_dir
+                .as_ref()
+                .map(|logger_dir| (milli::VisualSearchLogger::default(), logger_dir));
+            let logger: &mut dyn SearchLogger<_> =
+                if let Some((detailed_logger, _)) = detailed_logger.as_mut() {
+                    detailed_logger
+                } else {
+                    &mut default_logger
+                };
+
+            let start = Instant::now();
+
+            let mut ctx = SearchContext::new(&index, &txn)?;
+            let universe = filtered_universe(ctx.index, ctx.txn, &None)?;
+
+            let docs = execute_search(
+                &mut ctx,
+                (!query.trim().is_empty()).then(|| query.trim()),
+                TermsMatchingStrategy::Last,
+                milli::score_details::ScoringStrategy::Skip,
+                false,
+                universe,
+                &None,
+                &None,
+                GeoSortStrategy::default(),
+                0,
+                20,
+                None,
+                &mut DefaultSearchLogger,
+                logger,
+                TimeBudget::max(),
+                None,
+                None,
+            )?;
+            if let Some((logger, dir)) = detailed_logger {
+                logger.finish(&mut ctx, Path::new(dir))?;
+            }
+            let elapsed = start.elapsed();
+            println!("new: {}us, docids: {:?}", elapsed.as_micros(), docs.documents_ids);
+            if print_documents {
+                let documents = index
+                    .documents(&txn, docs.documents_ids.iter().copied())
+                    .unwrap()
+                    .into_iter()
+                    .map(|(id, obkv)| {
+                        let mut object = serde_json::Map::default();
+                        for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
+                            let value = obkv.get(fid).unwrap();
+                            let value: serde_json::Value = serde_json::from_slice(value).unwrap();
+                            object.insert(fid_name.to_owned(), value);
+                        }
+                        (id, serde_json::to_string_pretty(&object).unwrap())
+                    })
+                    .collect::<Vec<_>>();
+
+                for (id, document) in documents {
+                    println!("{id}:");
+                    println!("{document}");
+                }
+
+                let documents = index
+                    .documents(&txn, docs.documents_ids.iter().copied())
+                    .unwrap()
+                    .into_iter()
+                    .map(|(id, obkv)| {
+                        let mut object = serde_json::Map::default();
+                        for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
+                            let value = obkv.get(fid).unwrap();
+                            let value: serde_json::Value = serde_json::from_slice(value).unwrap();
+                            object.insert(fid_name.to_owned(), value);
+                        }
+                        (id, serde_json::to_string_pretty(&object).unwrap())
+                    })
+                    .collect::<Vec<_>>();
+                println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
+                for (id, document) in documents {
+                    println!("{id}:");
+                    println!("{document}");
+                }
+            }
+        }
+        query.clear();
+    }
+
+    Ok(())
+}
--- a/crates/milli/examples/settings.rs
+++ b/crates/milli/examples/settings.rs
@ -0,0 +1,33 @@
+// use big_s::S;
+use heed::EnvOpenOptions;
+// use maplit::hashset;
+use milli::{
+    update::{IndexerConfig, Settings},
+    Criterion, Index,
+};
+
+fn main() {
+    let mut options = EnvOpenOptions::new();
+    options.map_size(100 * 1024 * 1024 * 1024); // 100 GB
+
+    let index = Index::new(options, "data_movies.ms").unwrap();
+    let mut wtxn = index.write_txn().unwrap();
+
+    let config = IndexerConfig::default();
+    let mut builder = Settings::new(&mut wtxn, &index, &config);
+
+    // builder.set_min_word_len_one_typo(5);
+    // builder.set_min_word_len_two_typos(7);
+    // builder.set_sortable_fields(hashset! { S("release_date") });
+    builder.set_criteria(vec![
+        Criterion::Words,
+        Criterion::Typo,
+        Criterion::Proximity,
+        Criterion::Attribute,
+        Criterion::Sort,
+        Criterion::Exactness,
+    ]);
+
+    builder.execute(|_| (), || false).unwrap();
+    wtxn.commit().unwrap();
+}
--- a/crates/milli/fuzz/.gitignore
+++ b/crates/milli/fuzz/.gitignore
@ -0,0 +1,3 @@
+target
+corpus
+artifacts
--- a/crates/milli/src/asc_desc.rs
+++ b/crates/milli/src/asc_desc.rs
@ -0,0 +1,298 @@
+//! This module provides the `AscDesc` type and defines all the errors related to this type.
+
+use std::fmt;
+use std::str::FromStr;
+
+use serde::{Deserialize, Serialize};
+use thiserror::Error;
+
+use crate::error::is_reserved_keyword;
+use crate::search::facet::BadGeoError;
+use crate::{CriterionError, Error, UserError};
+
+/// This error type is never supposed to be shown to the end user.
+/// You must always cast it to a sort error or a criterion error.
+#[derive(Error, Debug)]
+pub enum AscDescError {
+    #[error(transparent)]
+    GeoError(BadGeoError),
+    #[error("Invalid syntax for the asc/desc parameter: expected expression ending by `:asc` or `:desc`, found `{name}`.")]
+    InvalidSyntax { name: String },
+    #[error("`{name}` is a reserved keyword and thus can't be used as a asc/desc rule.")]
+    ReservedKeyword { name: String },
+}
+
+impl From<BadGeoError> for AscDescError {
+    fn from(geo_error: BadGeoError) -> Self {
+        AscDescError::GeoError(geo_error)
+    }
+}
+
+impl From<AscDescError> for CriterionError {
+    fn from(error: AscDescError) -> Self {
+        match error {
+            AscDescError::GeoError(_) => {
+                CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() }
+            }
+            AscDescError::InvalidSyntax { name } => CriterionError::InvalidName { name },
+            AscDescError::ReservedKeyword { name } if name.starts_with("_geoPoint") => {
+                CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() }
+            }
+            AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => {
+                CriterionError::ReservedNameForFilter { name: "_geoRadius".to_string() }
+            }
+            AscDescError::ReservedKeyword { name } if name.starts_with("_geoBoundingBox") => {
+                CriterionError::ReservedNameForFilter { name: "_geoBoundingBox".to_string() }
+            }
+            AscDescError::ReservedKeyword { name } => CriterionError::ReservedName { name },
+        }
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub enum Member {
+    Field(String),
+    Geo([f64; 2]),
+}
+
+impl FromStr for Member {
+    type Err = AscDescError;
+
+    fn from_str(text: &str) -> Result<Member, Self::Err> {
+        match text.strip_prefix("_geoPoint(").and_then(|text| text.strip_suffix(')')) {
+            Some(point) => {
+                let (lat, lng) = point
+                    .split_once(',')
+                    .ok_or_else(|| AscDescError::ReservedKeyword { name: text.to_string() })
+                    .and_then(|(lat, lng)| {
+                        lat.trim()
+                            .parse()
+                            .and_then(|lat| lng.trim().parse().map(|lng| (lat, lng)))
+                            .map_err(|_| AscDescError::ReservedKeyword { name: text.to_string() })
+                    })?;
+                if !(-90.0..=90.0).contains(&lat) {
+                    return Err(BadGeoError::Lat(lat))?;
+                } else if !(-180.0..=180.0).contains(&lng) {
+                    return Err(BadGeoError::Lng(lng))?;
+                }
+                Ok(Member::Geo([lat, lng]))
+            }
+            None => {
+                if is_reserved_keyword(text)
+                    || text.starts_with("_geoRadius(")
+                    || text.starts_with("_geoBoundingBox(")
+                    || text.starts_with("_geo(")
+                    || text.starts_with("_geoDistance(")
+                {
+                    return Err(AscDescError::ReservedKeyword { name: text.to_string() })?;
+                }
+                Ok(Member::Field(text.to_string()))
+            }
+        }
+    }
+}
+
+impl fmt::Display for Member {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            Member::Field(name) => f.write_str(name),
+            Member::Geo([lat, lng]) => write!(f, "_geoPoint({}, {})", lat, lng),
+        }
+    }
+}
+
+impl Member {
+    pub fn field(&self) -> Option<&str> {
+        match self {
+            Member::Field(field) => Some(field),
+            Member::Geo(_) => None,
+        }
+    }
+
+    pub fn geo_point(&self) -> Option<&[f64; 2]> {
+        match self {
+            Member::Geo(point) => Some(point),
+            Member::Field(_) => None,
+        }
+    }
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq)]
+pub enum AscDesc {
+    Asc(Member),
+    Desc(Member),
+}
+
+impl AscDesc {
+    pub fn member(&self) -> &Member {
+        match self {
+            AscDesc::Asc(member) => member,
+            AscDesc::Desc(member) => member,
+        }
+    }
+
+    pub fn field(&self) -> Option<&str> {
+        self.member().field()
+    }
+}
+
+impl FromStr for AscDesc {
+    type Err = AscDescError;
+
+    fn from_str(text: &str) -> Result<AscDesc, Self::Err> {
+        match text.rsplit_once(':') {
+            Some((left, "asc")) => Ok(AscDesc::Asc(left.parse()?)),
+            Some((left, "desc")) => Ok(AscDesc::Desc(left.parse()?)),
+            _ => Err(AscDescError::InvalidSyntax { name: text.to_string() }),
+        }
+    }
+}
+
+#[derive(Error, Debug)]
+pub enum SortError {
+    #[error(transparent)]
+    ParseGeoError { error: BadGeoError },
+    #[error("Invalid syntax for the geo parameter: expected expression formated like \
+                    `_geoPoint(latitude, longitude)` and ending by `:asc` or `:desc`, found `{name}`.")]
+    BadGeoPointUsage { name: String },
+    #[error("Invalid syntax for the sort parameter: expected expression ending by `:asc` or `:desc`, found `{name}`.")]
+    InvalidName { name: String },
+    #[error("`{name}` is a reserved keyword and thus can't be used as a sort expression.")]
+    ReservedName { name: String },
+    #[error("`{name}` is a reserved keyword and thus can't be used as a sort expression. \
+                    Use the _geoPoint(latitude, longitude) built-in rule to sort on _geo field coordinates.")]
+    ReservedNameForSettings { name: String },
+    #[error("`{name}` is a reserved keyword and thus can't be used as a sort expression. \
+                    Use the _geoPoint(latitude, longitude) built-in rule to sort on _geo field coordinates.")]
+    ReservedNameForFilter { name: String },
+}
+
+impl From<AscDescError> for SortError {
+    fn from(error: AscDescError) -> Self {
+        match error {
+            AscDescError::GeoError(error) => SortError::ParseGeoError { error },
+            AscDescError::InvalidSyntax { name } => SortError::InvalidName { name },
+            AscDescError::ReservedKeyword { name } if name.starts_with("_geoPoint") => {
+                SortError::BadGeoPointUsage { name }
+            }
+            AscDescError::ReservedKeyword { name } if &name == "_geo" => {
+                SortError::ReservedNameForSettings { name }
+            }
+            AscDescError::ReservedKeyword { name } if name.starts_with("_geoRadius") => {
+                SortError::ReservedNameForFilter { name: String::from("_geoRadius") }
+            }
+            AscDescError::ReservedKeyword { name } if name.starts_with("_geoBoundingBox") => {
+                SortError::ReservedNameForFilter { name: String::from("_geoBoundingBox") }
+            }
+            AscDescError::ReservedKeyword { name } => SortError::ReservedName { name },
+        }
+    }
+}
+
+impl From<SortError> for Error {
+    fn from(error: SortError) -> Self {
+        Self::UserError(UserError::SortError(error))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use big_s::S;
+    use AscDesc::*;
+    use AscDescError::*;
+    use Member::*;
+
+    use super::*;
+
+    #[test]
+    fn parse_asc_desc() {
+        let valid_req = [
+            ("truc:asc", Asc(Field(S("truc")))),
+            ("bidule:desc", Desc(Field(S("bidule")))),
+            ("a-b:desc", Desc(Field(S("a-b")))),
+            ("a:b:desc", Desc(Field(S("a:b")))),
+            ("a12:asc", Asc(Field(S("a12")))),
+            ("42:asc", Asc(Field(S("42")))),
+            ("_geoPoint(42, 59):asc", Asc(Geo([42., 59.]))),
+            ("_geoPoint(42.459, 59):desc", Desc(Geo([42.459, 59.]))),
+            ("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))),
+            ("_geoPoint(42, 59.895):desc", Desc(Geo([42., 59.895]))),
+            ("_geoPoint(90.000000000, 180):desc", Desc(Geo([90., 180.]))),
+            ("_geoPoint(-90, -180.0000000000):asc", Asc(Geo([-90., -180.]))),
+            ("_geoPoint(42.0002, 59.895):desc", Desc(Geo([42.0002, 59.895]))),
+            ("_geoPoint(42., 59.):desc", Desc(Geo([42., 59.]))),
+            ("truc(12, 13):desc", Desc(Field(S("truc(12, 13)")))),
+        ];
+
+        for (req, expected) in valid_req {
+            let res = req.parse::<AscDesc>();
+            assert!(
+                res.is_ok(),
+                "Failed to parse `{}`, was expecting `{:?}` but instead got `{:?}`",
+                req,
+                expected,
+                res
+            );
+            assert_eq!(res.unwrap(), expected);
+        }
+
+        let invalid_req = [
+            ("truc:machin", InvalidSyntax { name: S("truc:machin") }),
+            ("truc:deesc", InvalidSyntax { name: S("truc:deesc") }),
+            ("truc:asc:deesc", InvalidSyntax { name: S("truc:asc:deesc") }),
+            ("42desc", InvalidSyntax { name: S("42desc") }),
+            ("_geoPoint:asc", ReservedKeyword { name: S("_geoPoint") }),
+            ("_geoDistance:asc", ReservedKeyword { name: S("_geoDistance") }),
+            ("_geoPoint(42.12 , 59.598)", InvalidSyntax { name: S("_geoPoint(42.12 , 59.598)") }),
+            (
+                "_geoPoint(42.12 , 59.598):deesc",
+                InvalidSyntax { name: S("_geoPoint(42.12 , 59.598):deesc") },
+            ),
+            (
+                "_geoPoint(42.12 , 59.598):machin",
+                InvalidSyntax { name: S("_geoPoint(42.12 , 59.598):machin") },
+            ),
+            (
+                "_geoPoint(42.12 , 59.598):asc:aasc",
+                InvalidSyntax { name: S("_geoPoint(42.12 , 59.598):asc:aasc") },
+            ),
+            (
+                "_geoPoint(42,12 , 59,598):desc",
+                ReservedKeyword { name: S("_geoPoint(42,12 , 59,598)") },
+            ),
+            ("_geoPoint(35, 85, 75):asc", ReservedKeyword { name: S("_geoPoint(35, 85, 75)") }),
+            ("_geoPoint(18):asc", ReservedKeyword { name: S("_geoPoint(18)") }),
+            ("_geoPoint(200, 200):asc", GeoError(BadGeoError::Lat(200.))),
+            ("_geoPoint(90.000001, 0):asc", GeoError(BadGeoError::Lat(90.000001))),
+            ("_geoPoint(0, -180.000001):desc", GeoError(BadGeoError::Lng(-180.000001))),
+            ("_geoPoint(159.256, 130):asc", GeoError(BadGeoError::Lat(159.256))),
+            ("_geoPoint(12, -2021):desc", GeoError(BadGeoError::Lng(-2021.))),
+            ("_geo(12, -2021):asc", ReservedKeyword { name: S("_geo(12, -2021)") }),
+            ("_geo(12, -2021):desc", ReservedKeyword { name: S("_geo(12, -2021)") }),
+            ("_geoDistance(12, -2021):asc", ReservedKeyword { name: S("_geoDistance(12, -2021)") }),
+            (
+                "_geoDistance(12, -2021):desc",
+                ReservedKeyword { name: S("_geoDistance(12, -2021)") },
+            ),
+        ];
+
+        for (req, expected_error) in invalid_req {
+            let res = req.parse::<AscDesc>();
+            assert!(
+                res.is_err(),
+                "Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`",
+                req,
+                res,
+            );
+            let res = res.unwrap_err();
+            assert_eq!(
+                res.to_string(),
+                expected_error.to_string(),
+                "Bad error for input {}: got `{:?}` instead of `{:?}`",
+                req,
+                res,
+                expected_error
+            );
+        }
+    }
+}
--- a/crates/milli/src/criterion.rs
+++ b/crates/milli/src/criterion.rs
@ -0,0 +1,188 @@
+use std::fmt;
+use std::str::FromStr;
+
+use serde::{Deserialize, Serialize};
+use thiserror::Error;
+
+use crate::{AscDesc, Member};
+
+#[derive(Error, Debug)]
+pub enum CriterionError {
+    #[error("`{name}` ranking rule is invalid. Valid ranking rules are words, typo, sort, proximity, attribute, exactness and custom ranking rules.")]
+    InvalidName { name: String },
+    #[error("`{name}` is a reserved keyword and thus can't be used as a ranking rule")]
+    ReservedName { name: String },
+    #[error(
+        "`{name}` is a reserved keyword and thus can't be used as a ranking rule. \
+`{name}` can only be used for sorting at search time"
+    )]
+    ReservedNameForSort { name: String },
+    #[error(
+        "`{name}` is a reserved keyword and thus can't be used as a ranking rule. \
+`{name}` can only be used for filtering at search time"
+    )]
+    ReservedNameForFilter { name: String },
+}
+
+#[derive(Debug, Serialize, Deserialize, Clone, PartialEq, Eq)]
+pub enum Criterion {
+    /// Sorted by decreasing number of matched query terms.
+    /// Query words at the front of an attribute is considered better than if it was at the back.
+    Words,
+    /// Sorted by increasing number of typos.
+    Typo,
+    /// Sorted by increasing distance between matched query terms.
+    Proximity,
+    /// Documents with quey words contained in more important
+    /// attributes are considered better.
+    Attribute,
+    /// Dynamically sort at query time the documents. None, one or multiple Asc/Desc sortable
+    /// attributes can be used in place of this criterion at query time.
+    Sort,
+    /// Sorted by the similarity of the matched words with the query words.
+    Exactness,
+    /// Sorted by the increasing value of the field specified.
+    Asc(String),
+    /// Sorted by the decreasing value of the field specified.
+    Desc(String),
+}
+
+impl Criterion {
+    /// Returns the field name parameter of this criterion.
+    pub fn field_name(&self) -> Option<&str> {
+        match self {
+            Criterion::Asc(name) | Criterion::Desc(name) => Some(name),
+            _otherwise => None,
+        }
+    }
+}
+
+impl FromStr for Criterion {
+    type Err = CriterionError;
+
+    fn from_str(text: &str) -> Result<Criterion, Self::Err> {
+        match text {
+            "words" => Ok(Criterion::Words),
+            "typo" => Ok(Criterion::Typo),
+            "proximity" => Ok(Criterion::Proximity),
+            "attribute" => Ok(Criterion::Attribute),
+            "sort" => Ok(Criterion::Sort),
+            "exactness" => Ok(Criterion::Exactness),
+            text => match AscDesc::from_str(text)? {
+                AscDesc::Asc(Member::Field(field)) => Ok(Criterion::Asc(field)),
+                AscDesc::Desc(Member::Field(field)) => Ok(Criterion::Desc(field)),
+                AscDesc::Asc(Member::Geo(_)) | AscDesc::Desc(Member::Geo(_)) => {
+                    Err(CriterionError::ReservedNameForSort { name: "_geoPoint".to_string() })?
+                }
+            },
+        }
+    }
+}
+
+pub fn default_criteria() -> Vec<Criterion> {
+    vec![
+        Criterion::Words,
+        Criterion::Typo,
+        Criterion::Proximity,
+        Criterion::Attribute,
+        Criterion::Sort,
+        Criterion::Exactness,
+    ]
+}
+
+impl fmt::Display for Criterion {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        use Criterion::*;
+
+        match self {
+            Words => f.write_str("words"),
+            Typo => f.write_str("typo"),
+            Proximity => f.write_str("proximity"),
+            Attribute => f.write_str("attribute"),
+            Sort => f.write_str("sort"),
+            Exactness => f.write_str("exactness"),
+            Asc(attr) => write!(f, "{}:asc", attr),
+            Desc(attr) => write!(f, "{}:desc", attr),
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use big_s::S;
+    use CriterionError::*;
+
+    use super::*;
+
+    #[test]
+    fn parse_criterion() {
+        let valid_criteria = [
+            ("words", Criterion::Words),
+            ("typo", Criterion::Typo),
+            ("proximity", Criterion::Proximity),
+            ("attribute", Criterion::Attribute),
+            ("sort", Criterion::Sort),
+            ("exactness", Criterion::Exactness),
+            ("price:asc", Criterion::Asc(S("price"))),
+            ("price:desc", Criterion::Desc(S("price"))),
+            ("price:asc:desc", Criterion::Desc(S("price:asc"))),
+            ("truc:machin:desc", Criterion::Desc(S("truc:machin"))),
+            ("hello-world!:desc", Criterion::Desc(S("hello-world!"))),
+            ("it's spacy over there:asc", Criterion::Asc(S("it's spacy over there"))),
+        ];
+
+        for (input, expected) in valid_criteria {
+            let res = input.parse::<Criterion>();
+            assert!(
+                res.is_ok(),
+                "Failed to parse `{}`, was expecting `{:?}` but instead got `{:?}`",
+                input,
+                expected,
+                res
+            );
+            assert_eq!(res.unwrap(), expected);
+        }
+
+        let invalid_criteria = [
+            ("words suffix", InvalidName { name: S("words suffix") }),
+            ("prefix typo", InvalidName { name: S("prefix typo") }),
+            ("proximity attribute", InvalidName { name: S("proximity attribute") }),
+            ("price", InvalidName { name: S("price") }),
+            ("asc:price", InvalidName { name: S("asc:price") }),
+            ("price:deesc", InvalidName { name: S("price:deesc") }),
+            ("price:aasc", InvalidName { name: S("price:aasc") }),
+            ("price:asc and desc", InvalidName { name: S("price:asc and desc") }),
+            ("price:asc:truc", InvalidName { name: S("price:asc:truc") }),
+            ("_geo:asc", ReservedName { name: S("_geo") }),
+            ("_geoDistance:asc", ReservedName { name: S("_geoDistance") }),
+            ("_geoPoint:asc", ReservedNameForSort { name: S("_geoPoint") }),
+            ("_geoPoint(42, 75):asc", ReservedNameForSort { name: S("_geoPoint") }),
+            ("_geoRadius:asc", ReservedNameForFilter { name: S("_geoRadius") }),
+            ("_geoRadius(42, 75, 59):asc", ReservedNameForFilter { name: S("_geoRadius") }),
+            ("_geoBoundingBox:asc", ReservedNameForFilter { name: S("_geoBoundingBox") }),
+            (
+                "_geoBoundingBox([42, 75], [75, 59]):asc",
+                ReservedNameForFilter { name: S("_geoBoundingBox") },
+            ),
+        ];
+
+        for (input, expected) in invalid_criteria {
+            let res = input.parse::<Criterion>();
+            assert!(
+                res.is_err(),
+                "Should no be able to parse `{}`, was expecting an error but instead got: `{:?}`",
+                input,
+                res
+            );
+            let res = res.unwrap_err();
+            assert_eq!(
+                res.to_string(),
+                expected.to_string(),
+                "Bad error for input {}: got `{:?}` instead of `{:?}`",
+                input,
+                res,
+                expected
+            );
+        }
+    }
+}
--- a/crates/milli/src/documents/builder.rs
+++ b/crates/milli/src/documents/builder.rs
@ -0,0 +1,600 @@
+use std::io::{self, Write};
+
+use grenad::{CompressionType, WriterBuilder};
+use serde::de::Deserializer;
+use serde_json::{to_writer, Value};
+
+use super::{DocumentsBatchIndex, Error, DOCUMENTS_BATCH_INDEX_KEY};
+use crate::documents::serde_impl::DocumentVisitor;
+use crate::Object;
+
+/// The `DocumentsBatchBuilder` provides a way to build a documents batch in the intermediary
+/// format used by milli.
+///
+/// The writer used by the `DocumentsBatchBuilder` can be read using a `DocumentsBatchReader`
+/// to iterate over the documents.
+///
+/// ## example:
+/// ```
+/// use serde_json::json;
+/// use milli::documents::DocumentsBatchBuilder;
+///
+/// let json = json!({ "id": 1, "name": "foo" });
+///
+/// let mut builder = DocumentsBatchBuilder::new(Vec::new());
+/// builder.append_json_object(json.as_object().unwrap()).unwrap();
+/// let _vector = builder.into_inner().unwrap();
+/// ```
+pub struct DocumentsBatchBuilder<W> {
+    /// The inner grenad writer, the last value must always be the `DocumentsBatchIndex`.
+    writer: grenad::Writer<W>,
+    /// A map that creates the relation between field ids and field names.
+    fields_index: DocumentsBatchIndex,
+    /// The number of documents that were added to this builder,
+    /// it doesn't take the primary key of the documents into account at this point.
+    documents_count: u32,
+
+    /// A buffer to store a temporary obkv buffer and avoid reallocating.
+    obkv_buffer: Vec<u8>,
+    /// A buffer to serialize the values and avoid reallocating,
+    /// serialized values are stored in an obkv.
+    value_buffer: Vec<u8>,
+}
+
+impl<W: Write> DocumentsBatchBuilder<W> {
+    pub fn new(writer: W) -> DocumentsBatchBuilder<W> {
+        DocumentsBatchBuilder {
+            writer: WriterBuilder::new().compression_type(CompressionType::None).build(writer),
+            fields_index: DocumentsBatchIndex::default(),
+            documents_count: 0,
+            obkv_buffer: Vec::new(),
+            value_buffer: Vec::new(),
+        }
+    }
+
+    /// Returns the number of documents inserted into this builder.
+    pub fn documents_count(&self) -> u32 {
+        self.documents_count
+    }
+
+    /// Appends a new JSON object into the batch and updates the `DocumentsBatchIndex` accordingly.
+    pub fn append_json_object(&mut self, object: &Object) -> io::Result<()> {
+        // Make sure that we insert the fields ids in order as the obkv writer has this requirement.
+        let mut fields_ids: Vec<_> = object.keys().map(|k| self.fields_index.insert(k)).collect();
+        fields_ids.sort_unstable();
+
+        self.obkv_buffer.clear();
+        let mut writer = obkv::KvWriter::new(&mut self.obkv_buffer);
+        for field_id in fields_ids {
+            let key = self.fields_index.name(field_id).unwrap();
+            self.value_buffer.clear();
+            to_writer(&mut self.value_buffer, &object[key])?;
+            writer.insert(field_id, &self.value_buffer)?;
+        }
+
+        let internal_id = self.documents_count.to_be_bytes();
+        let document_bytes = writer.into_inner()?;
+        self.writer.insert(internal_id, &document_bytes)?;
+        self.documents_count += 1;
+
+        Ok(())
+    }
+
+    /// Appends a new JSON array of objects into the batch and updates the `DocumentsBatchIndex` accordingly.
+    pub fn append_json_array<R: io::Read>(&mut self, reader: R) -> Result<(), Error> {
+        let mut de = serde_json::Deserializer::from_reader(reader);
+        let mut visitor = DocumentVisitor::new(self);
+        de.deserialize_any(&mut visitor)?
+    }
+
+    /// Appends a new CSV file into the batch and updates the `DocumentsBatchIndex` accordingly.
+    pub fn append_csv<R: io::Read>(&mut self, mut reader: csv::Reader<R>) -> Result<(), Error> {
+        // Make sure that we insert the fields ids in order as the obkv writer has this requirement.
+        let mut typed_fields_ids: Vec<_> = reader
+            .headers()?
+            .into_iter()
+            .map(parse_csv_header)
+            .map(|(k, t)| (self.fields_index.insert(k), t))
+            .enumerate()
+            .collect();
+        // Make sure that we insert the fields ids in order as the obkv writer has this requirement.
+        typed_fields_ids.sort_unstable_by_key(|(_, (fid, _))| *fid);
+
+        let mut record = csv::StringRecord::new();
+        let mut line = 0;
+        while reader.read_record(&mut record)? {
+            // We increment here and not at the end of the while loop to take
+            // the header offset into account.
+            line += 1;
+
+            self.obkv_buffer.clear();
+            let mut writer = obkv::KvWriter::new(&mut self.obkv_buffer);
+
+            for (i, (field_id, type_)) in typed_fields_ids.iter() {
+                self.value_buffer.clear();
+
+                let value = &record[*i];
+                let trimmed_value = value.trim();
+                match type_ {
+                    AllowedType::Number => {
+                        if trimmed_value.is_empty() {
+                            to_writer(&mut self.value_buffer, &Value::Null)?;
+                        } else if let Ok(integer) = trimmed_value.parse::<i64>() {
+                            to_writer(&mut self.value_buffer, &integer)?;
+                        } else {
+                            match trimmed_value.parse::<f64>() {
+                                Ok(float) => {
+                                    to_writer(&mut self.value_buffer, &float)?;
+                                }
+                                Err(error) => {
+                                    return Err(Error::ParseFloat {
+                                        error,
+                                        line,
+                                        value: value.to_string(),
+                                    });
+                                }
+                            }
+                        }
+                    }
+                    AllowedType::Boolean => {
+                        if trimmed_value.is_empty() {
+                            to_writer(&mut self.value_buffer, &Value::Null)?;
+                        } else {
+                            match trimmed_value.parse::<bool>() {
+                                Ok(bool) => {
+                                    to_writer(&mut self.value_buffer, &bool)?;
+                                }
+                                Err(error) => {
+                                    return Err(Error::ParseBool {
+                                        error,
+                                        line,
+                                        value: value.to_string(),
+                                    });
+                                }
+                            }
+                        }
+                    }
+                    AllowedType::String => {
+                        if value.is_empty() {
+                            to_writer(&mut self.value_buffer, &Value::Null)?;
+                        } else {
+                            to_writer(&mut self.value_buffer, value)?;
+                        }
+                    }
+                }
+
+                // We insert into the obkv writer the value buffer that has been filled just above.
+                writer.insert(*field_id, &self.value_buffer)?;
+            }
+
+            let internal_id = self.documents_count.to_be_bytes();
+            let document_bytes = writer.into_inner()?;
+            self.writer.insert(internal_id, &document_bytes)?;
+            self.documents_count += 1;
+        }
+
+        Ok(())
+    }
+
+    /// Flushes the content on disk and stores the final version of the `DocumentsBatchIndex`.
+    pub fn into_inner(mut self) -> io::Result<W> {
+        let DocumentsBatchBuilder { mut writer, fields_index, .. } = self;
+
+        // We serialize and insert the `DocumentsBatchIndex` as the last key of the grenad writer.
+        self.value_buffer.clear();
+        to_writer(&mut self.value_buffer, &fields_index)?;
+        writer.insert(DOCUMENTS_BATCH_INDEX_KEY, &self.value_buffer)?;
+
+        writer.into_inner()
+    }
+}
+
+#[derive(Debug)]
+enum AllowedType {
+    String,
+    Boolean,
+    Number,
+}
+
+fn parse_csv_header(header: &str) -> (&str, AllowedType) {
+    // if there are several separators we only split on the last one.
+    match header.rsplit_once(':') {
+        Some((field_name, field_type)) => match field_type {
+            "string" => (field_name, AllowedType::String),
+            "boolean" => (field_name, AllowedType::Boolean),
+            "number" => (field_name, AllowedType::Number),
+            // if the pattern isn't recognized, we keep the whole field.
+            _otherwise => (header, AllowedType::String),
+        },
+        None => (header, AllowedType::String),
+    }
+}
+
+#[cfg(test)]
+mod test {
+    use std::io::Cursor;
+
+    use serde_json::json;
+
+    use super::*;
+    use crate::documents::{obkv_to_object, DocumentsBatchReader};
+
+    #[test]
+    fn add_single_documents_json() {
+        let json = serde_json::json!({
+            "id": 1,
+            "field": "hello!",
+        });
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_json_object(json.as_object().unwrap()).unwrap();
+
+        let json = serde_json::json!({
+            "blabla": false,
+            "field": "hello!",
+            "id": 1,
+        });
+
+        builder.append_json_object(json.as_object().unwrap()).unwrap();
+
+        assert_eq!(builder.documents_count(), 2);
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+        assert_eq!(index.len(), 3);
+
+        let document = cursor.next_document().unwrap().unwrap();
+        assert_eq!(document.iter().count(), 2);
+
+        let document = cursor.next_document().unwrap().unwrap();
+        assert_eq!(document.iter().count(), 3);
+
+        assert!(cursor.next_document().unwrap().is_none());
+    }
+
+    #[test]
+    fn add_documents_csv() {
+        let csv_content = "id:number,field:string\n1,hello!\n2,blabla";
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        assert_eq!(builder.documents_count(), 2);
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+        assert_eq!(index.len(), 2);
+
+        let document = cursor.next_document().unwrap().unwrap();
+        assert_eq!(document.iter().count(), 2);
+
+        let document = cursor.next_document().unwrap().unwrap();
+        assert_eq!(document.iter().count(), 2);
+
+        assert!(cursor.next_document().unwrap().is_none());
+    }
+
+    #[test]
+    fn simple_csv_document() {
+        let csv_content = r#"city,country,pop
+"Boston","United States","4628910""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+        let doc = cursor.next_document().unwrap().unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+
+        assert_eq!(
+            val,
+            json!({
+                "city": "Boston",
+                "country": "United States",
+                "pop": "4628910",
+            })
+        );
+
+        assert!(cursor.next_document().unwrap().is_none());
+    }
+
+    #[test]
+    fn coma_in_field() {
+        let csv_content = r#"city,country,pop
+"Boston","United, States","4628910""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+
+        let doc = cursor.next_document().unwrap().unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+
+        assert_eq!(
+            val,
+            json!({
+                "city": "Boston",
+                "country": "United, States",
+                "pop": "4628910",
+            })
+        );
+    }
+
+    #[test]
+    fn quote_in_field() {
+        let csv_content = r#"city,country,pop
+"Boston","United"" States","4628910""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+
+        let doc = cursor.next_document().unwrap().unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+
+        assert_eq!(
+            val,
+            json!({
+                "city": "Boston",
+                "country": "United\" States",
+                "pop": "4628910",
+            })
+        );
+    }
+
+    #[test]
+    fn integer_in_field() {
+        let csv_content = r#"city,country,pop:number
+"Boston","United States","4628910""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+
+        let doc = cursor.next_document().unwrap().unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+
+        assert_eq!(
+            val,
+            json!({
+                "city": "Boston",
+                "country": "United States",
+                "pop": 4628910,
+            })
+        );
+    }
+
+    #[test]
+    fn integer_as_id() {
+        let csv_content = r#""id:number","title:string","comment:string"
+"1239","Pride and Prejudice","A great book""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+
+        let doc = cursor.next_document().unwrap().unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+
+        assert_eq!(
+            val,
+            json!({
+                "id": 1239,
+                "title": "Pride and Prejudice",
+                "comment": "A great book",
+            })
+        );
+    }
+
+    #[test]
+    fn float_in_field() {
+        let csv_content = r#"city,country,pop:number
+"Boston","United States","4628910.01""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+
+        let doc = cursor.next_document().unwrap().unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+
+        assert_eq!(
+            val,
+            json!({
+                "city": "Boston",
+                "country": "United States",
+                "pop": 4628910.01,
+            })
+        );
+    }
+
+    #[test]
+    fn several_colon_in_header() {
+        let csv_content = r#"city:love:string,country:state,pop
+"Boston","United States","4628910""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+
+        let doc = cursor.next_document().unwrap().unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+
+        assert_eq!(
+            val,
+            json!({
+                "city:love": "Boston",
+                "country:state": "United States",
+                "pop": "4628910",
+            })
+        );
+    }
+
+    #[test]
+    fn ending_by_colon_in_header() {
+        let csv_content = r#"city:,country,pop
+"Boston","United States","4628910""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+
+        let doc = cursor.next_document().unwrap().unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+
+        assert_eq!(
+            val,
+            json!({
+                "city:": "Boston",
+                "country": "United States",
+                "pop": "4628910",
+            })
+        );
+    }
+
+    #[test]
+    fn starting_by_colon_in_header() {
+        let csv_content = r#":city,country,pop
+"Boston","United States","4628910""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+
+        let doc = cursor.next_document().unwrap().unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+
+        assert_eq!(
+            val,
+            json!({
+                ":city": "Boston",
+                "country": "United States",
+                "pop": "4628910",
+            })
+        );
+    }
+
+    #[ignore]
+    #[test]
+    fn starting_by_colon_in_header2() {
+        let csv_content = r#":string,country,pop
+"Boston","United States","4628910""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, _) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+
+        assert!(cursor.next_document().is_err());
+    }
+
+    #[test]
+    fn double_colon_in_header() {
+        let csv_content = r#"city::string,country,pop
+"Boston","United States","4628910""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut cursor, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+
+        let doc = cursor.next_document().unwrap().unwrap();
+        let val = obkv_to_object(&doc, &index).map(Value::from).unwrap();
+
+        assert_eq!(
+            val,
+            json!({
+                "city:": "Boston",
+                "country": "United States",
+                "pop": "4628910",
+            })
+        );
+    }
+
+    #[test]
+    fn bad_type_in_header() {
+        let csv_content = r#"city,country:number,pop
+"Boston","United States","4628910""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        assert!(builder.append_csv(csv).is_err());
+    }
+
+    #[test]
+    fn bad_column_count1() {
+        let csv_content = r#"city,country,pop
+"Boston","United States","4628910", "too much
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content"#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        assert!(builder.append_csv(csv).is_err());
+    }
+
+    #[test]
+    fn bad_column_count2() {
+        let csv_content = r#"city,country,pop
+"Boston","United States""#;
+        let csv = csv::Reader::from_reader(Cursor::new(csv_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        assert!(builder.append_csv(csv).is_err());
+    }
+}
--- a/crates/milli/src/documents/enriched.rs
+++ b/crates/milli/src/documents/enriched.rs
@ -0,0 +1,110 @@
+use std::fs::File;
+use std::io::BufReader;
+use std::{io, str};
+
+use obkv::KvReader;
+
+use super::{
+    DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchIndex, DocumentsBatchReader,
+    Error,
+};
+use crate::update::DocumentId;
+use crate::FieldId;
+
+/// The `EnrichedDocumentsBatchReader` provides a way to iterate over documents that have
+/// been created with a `DocumentsBatchWriter` and, for the enriched data,
+/// a simple `grenad::Reader<File>`.
+///
+/// The documents are returned in the form of `obkv::Reader` where each field is identified with a
+/// `FieldId`. The mapping between the field ids and the field names is done thanks to the index.
+pub struct EnrichedDocumentsBatchReader<R> {
+    documents: DocumentsBatchReader<R>,
+    primary_key: String,
+    external_ids: grenad::ReaderCursor<BufReader<File>>,
+}
+
+impl<R: io::Read + io::Seek> EnrichedDocumentsBatchReader<R> {
+    pub fn new(
+        documents: DocumentsBatchReader<R>,
+        primary_key: String,
+        external_ids: grenad::Reader<BufReader<File>>,
+    ) -> Result<Self, Error> {
+        if documents.documents_count() as u64 == external_ids.len() {
+            Ok(EnrichedDocumentsBatchReader {
+                documents,
+                primary_key,
+                external_ids: external_ids.into_cursor()?,
+            })
+        } else {
+            Err(Error::InvalidEnrichedData)
+        }
+    }
+
+    pub fn documents_count(&self) -> u32 {
+        self.documents.documents_count()
+    }
+
+    pub fn primary_key(&self) -> &str {
+        &self.primary_key
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.documents.is_empty()
+    }
+
+    pub fn documents_batch_index(&self) -> &DocumentsBatchIndex {
+        self.documents.documents_batch_index()
+    }
+
+    /// This method returns a forward cursor over the enriched documents.
+    pub fn into_cursor_and_fields_index(
+        self,
+    ) -> (EnrichedDocumentsBatchCursor<R>, DocumentsBatchIndex) {
+        let EnrichedDocumentsBatchReader { documents, primary_key, mut external_ids } = self;
+        let (documents, fields_index) = documents.into_cursor_and_fields_index();
+        external_ids.reset();
+        (EnrichedDocumentsBatchCursor { documents, primary_key, external_ids }, fields_index)
+    }
+}
+
+#[derive(Debug, Clone)]
+pub struct EnrichedDocument<'a> {
+    pub document: KvReader<'a, FieldId>,
+    pub document_id: DocumentId,
+}
+
+pub struct EnrichedDocumentsBatchCursor<R> {
+    documents: DocumentsBatchCursor<R>,
+    primary_key: String,
+    external_ids: grenad::ReaderCursor<BufReader<File>>,
+}
+
+impl<R> EnrichedDocumentsBatchCursor<R> {
+    pub fn primary_key(&self) -> &str {
+        &self.primary_key
+    }
+    /// Resets the cursor to be able to read from the start again.
+    pub fn reset(&mut self) {
+        self.documents.reset();
+        self.external_ids.reset();
+    }
+}
+
+impl<R: io::Read + io::Seek> EnrichedDocumentsBatchCursor<R> {
+    /// Returns the next document, starting from the first one. Subsequent calls to
+    /// `next_document` advance the document reader until all the documents have been read.
+    pub fn next_enriched_document(
+        &mut self,
+    ) -> Result<Option<EnrichedDocument<'_>>, DocumentsBatchCursorError> {
+        let document = self.documents.next_document()?;
+        let document_id = match self.external_ids.move_on_next()? {
+            Some((_, bytes)) => serde_json::from_slice(bytes).map(Some)?,
+            None => None,
+        };
+
+        match document.zip(document_id) {
+            Some((document, document_id)) => Ok(Some(EnrichedDocument { document, document_id })),
+            None => Ok(None),
+        }
+    }
+}
--- a/crates/milli/src/documents/mod.rs
+++ b/crates/milli/src/documents/mod.rs
@ -0,0 +1,273 @@
+mod builder;
+mod enriched;
+mod primary_key;
+mod reader;
+mod serde_impl;
+
+use std::fmt::Debug;
+use std::io;
+use std::str::Utf8Error;
+
+use bimap::BiHashMap;
+pub use builder::DocumentsBatchBuilder;
+pub use enriched::{EnrichedDocument, EnrichedDocumentsBatchCursor, EnrichedDocumentsBatchReader};
+use obkv::KvReader;
+pub use primary_key::{
+    validate_document_id_value, DocumentIdExtractionError, FieldIdMapper, PrimaryKey,
+    DEFAULT_PRIMARY_KEY,
+};
+pub use reader::{DocumentsBatchCursor, DocumentsBatchCursorError, DocumentsBatchReader};
+use serde::{Deserialize, Serialize};
+
+use crate::error::{FieldIdMapMissingEntry, InternalError};
+use crate::{FieldId, Object, Result};
+
+/// The key that is used to store the `DocumentsBatchIndex` datastructure,
+/// it is the absolute last key of the list.
+const DOCUMENTS_BATCH_INDEX_KEY: [u8; 8] = u64::MAX.to_be_bytes();
+
+/// Helper function to convert an obkv reader into a JSON object.
+pub fn obkv_to_object(obkv: &KvReader<'_, FieldId>, index: &DocumentsBatchIndex) -> Result<Object> {
+    obkv.iter()
+        .map(|(field_id, value)| {
+            let field_name = index
+                .name(field_id)
+                .ok_or(FieldIdMapMissingEntry::FieldId { field_id, process: "obkv_to_object" })?;
+            let value = serde_json::from_slice(value).map_err(InternalError::SerdeJson)?;
+            Ok((field_name.to_string(), value))
+        })
+        .collect()
+}
+
+/// A bidirectional map that links field ids to their name in a document batch.
+#[derive(Default, Clone, Debug, Serialize, Deserialize)]
+pub struct DocumentsBatchIndex(pub BiHashMap<FieldId, String>);
+
+impl DocumentsBatchIndex {
+    /// Insert the field in the map, or return it's field id if it doesn't already exists.
+    pub fn insert(&mut self, field: &str) -> FieldId {
+        match self.0.get_by_right(field) {
+            Some(field_id) => *field_id,
+            None => {
+                let field_id = self.0.len() as FieldId;
+                self.0.insert(field_id, field.to_string());
+                field_id
+            }
+        }
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    pub fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    pub fn iter(&self) -> bimap::hash::Iter<'_, FieldId, String> {
+        self.0.iter()
+    }
+
+    pub fn name(&self, id: FieldId) -> Option<&str> {
+        self.0.get_by_left(&id).map(AsRef::as_ref)
+    }
+
+    pub fn id(&self, name: &str) -> Option<FieldId> {
+        self.0.get_by_right(name).cloned()
+    }
+
+    pub fn recreate_json(&self, document: &obkv::KvReaderU16<'_>) -> Result<Object> {
+        let mut map = Object::new();
+
+        for (k, v) in document.iter() {
+            // TODO: TAMO: update the error type
+            let key =
+                self.0.get_by_left(&k).ok_or(crate::error::InternalError::DatabaseClosing)?.clone();
+            let value = serde_json::from_slice::<serde_json::Value>(v)
+                .map_err(crate::error::InternalError::SerdeJson)?;
+            map.insert(key, value);
+        }
+
+        Ok(map)
+    }
+}
+
+impl FieldIdMapper for DocumentsBatchIndex {
+    fn id(&self, name: &str) -> Option<FieldId> {
+        self.id(name)
+    }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum Error {
+    #[error("Error parsing number {value:?} at line {line}: {error}")]
+    ParseFloat { error: std::num::ParseFloatError, line: usize, value: String },
+    #[error("Error parsing boolean {value:?} at line {line}: {error}")]
+    ParseBool { error: std::str::ParseBoolError, line: usize, value: String },
+    #[error("Invalid document addition format, missing the documents batch index.")]
+    InvalidDocumentFormat,
+    #[error("Invalid enriched data.")]
+    InvalidEnrichedData,
+    #[error(transparent)]
+    InvalidUtf8(#[from] Utf8Error),
+    #[error(transparent)]
+    Csv(#[from] csv::Error),
+    #[error(transparent)]
+    Json(#[from] serde_json::Error),
+    #[error(transparent)]
+    Serialize(serde_json::Error),
+    #[error(transparent)]
+    Grenad(#[from] grenad::Error),
+    #[error(transparent)]
+    Io(#[from] io::Error),
+}
+
+pub fn objects_from_json_value(json: serde_json::Value) -> Vec<crate::Object> {
+    let documents = match json {
+        object @ serde_json::Value::Object(_) => vec![object],
+        serde_json::Value::Array(objects) => objects,
+        invalid => {
+            panic!("an array of objects must be specified, {:#?} is not an array", invalid)
+        }
+    };
+    let mut objects = vec![];
+    for document in documents {
+        let object = match document {
+            serde_json::Value::Object(object) => object,
+            invalid => panic!("an object must be specified, {:#?} is not an object", invalid),
+        };
+        objects.push(object);
+    }
+    objects
+}
+
+/// Macro used to generate documents, with the same syntax as `serde_json::json`
+#[cfg(test)]
+macro_rules! documents {
+    ($data:tt) => {{
+        let documents = serde_json::json!($data);
+        let documents = $crate::documents::objects_from_json_value(documents);
+        $crate::documents::documents_batch_reader_from_objects(documents)
+    }};
+}
+
+pub fn documents_batch_reader_from_objects(
+    objects: impl IntoIterator<Item = Object>,
+) -> DocumentsBatchReader<std::io::Cursor<Vec<u8>>> {
+    let mut builder = DocumentsBatchBuilder::new(Vec::new());
+    for object in objects {
+        builder.append_json_object(&object).unwrap();
+    }
+    let vector = builder.into_inner().unwrap();
+    DocumentsBatchReader::from_reader(std::io::Cursor::new(vector)).unwrap()
+}
+
+#[cfg(test)]
+mod test {
+    use std::io::Cursor;
+
+    use serde_json::{json, Value};
+
+    use super::*;
+
+    #[test]
+    fn create_documents_no_errors() {
+        let value = json!({
+            "number": 1,
+            "string": "this is a field",
+            "array": ["an", "array"],
+            "object": {
+                "key": "value",
+            },
+            "bool": true
+        });
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_json_object(value.as_object().unwrap()).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut documents, index) = DocumentsBatchReader::from_reader(Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+
+        assert_eq!(index.iter().count(), 5);
+        let reader = documents.next_document().unwrap().unwrap();
+        assert_eq!(reader.iter().count(), 5);
+        assert!(documents.next_document().unwrap().is_none());
+    }
+
+    #[test]
+    fn test_add_multiple_documents() {
+        let doc1 = json!({
+            "bool": true,
+        });
+        let doc2 = json!({
+            "toto": false,
+        });
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_json_object(doc1.as_object().unwrap()).unwrap();
+        builder.append_json_object(doc2.as_object().unwrap()).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        let (mut documents, index) = DocumentsBatchReader::from_reader(io::Cursor::new(vector))
+            .unwrap()
+            .into_cursor_and_fields_index();
+        assert_eq!(index.iter().count(), 2);
+        let reader = documents.next_document().unwrap().unwrap();
+        assert_eq!(reader.iter().count(), 1);
+        assert!(documents.next_document().unwrap().is_some());
+        assert!(documents.next_document().unwrap().is_none());
+    }
+
+    #[test]
+    fn test_nested() {
+        let docs_reader = documents!([{
+            "hello": {
+                "toto": ["hello"]
+            }
+        }]);
+
+        let (mut cursor, _) = docs_reader.into_cursor_and_fields_index();
+        let doc = cursor.next_document().unwrap().unwrap();
+        let nested: Value = serde_json::from_slice(doc.get(0).unwrap()).unwrap();
+        assert_eq!(nested, json!({ "toto": ["hello"] }));
+    }
+
+    #[test]
+    fn out_of_order_json_fields() {
+        let _documents = documents!([
+            {"id": 1,"b": 0},
+            {"id": 2,"a": 0,"b": 0},
+        ]);
+    }
+
+    #[test]
+    fn csv_types_dont_panic() {
+        let csv1_content =
+            "id:number,b:boolean,c,d:number\n1,,,\n2,true,doggo,2\n3,false,the best doggo,-2\n4,,\"Hello, World!\",2.5";
+        let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv1).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
+    }
+
+    #[test]
+    fn out_of_order_csv_fields() {
+        let csv1_content = "id:number,b\n1,0";
+        let csv1 = csv::Reader::from_reader(Cursor::new(csv1_content));
+
+        let csv2_content = "id:number,a,b\n2,0,0";
+        let csv2 = csv::Reader::from_reader(Cursor::new(csv2_content));
+
+        let mut builder = DocumentsBatchBuilder::new(Vec::new());
+        builder.append_csv(csv1).unwrap();
+        builder.append_csv(csv2).unwrap();
+        let vector = builder.into_inner().unwrap();
+
+        DocumentsBatchReader::from_reader(Cursor::new(vector)).unwrap();
+    }
+}
--- a/crates/milli/src/documents/primary_key.rs
+++ b/crates/milli/src/documents/primary_key.rs
@ -0,0 +1,174 @@
+use std::iter;
+use std::result::Result as StdResult;
+
+use serde_json::Value;
+
+use crate::{FieldId, InternalError, Object, Result, UserError};
+
+/// The symbol used to define levels in a nested primary key.
+const PRIMARY_KEY_SPLIT_SYMBOL: char = '.';
+
+/// The default primary that is used when not specified.
+pub const DEFAULT_PRIMARY_KEY: &str = "id";
+
+/// Trait for objects that can map the name of a field to its [`FieldId`].
+pub trait FieldIdMapper {
+    /// Attempts to map the passed name to its [`FieldId`].
+    ///
+    /// `None` if the field with this name was not found.
+    fn id(&self, name: &str) -> Option<FieldId>;
+}
+
+/// A type that represent the type of primary key that has been set
+/// for this index, a classic flat one or a nested one.
+#[derive(Debug, Clone, Copy)]
+pub enum PrimaryKey<'a> {
+    Flat { name: &'a str, field_id: FieldId },
+    Nested { name: &'a str },
+}
+
+pub enum DocumentIdExtractionError {
+    InvalidDocumentId(UserError),
+    MissingDocumentId,
+    TooManyDocumentIds(usize),
+}
+
+impl<'a> PrimaryKey<'a> {
+    pub fn new(path: &'a str, fields: &impl FieldIdMapper) -> Option<Self> {
+        Some(if path.contains(PRIMARY_KEY_SPLIT_SYMBOL) {
+            Self::Nested { name: path }
+        } else {
+            let field_id = fields.id(path)?;
+            Self::Flat { name: path, field_id }
+        })
+    }
+
+    pub fn name(&self) -> &str {
+        match self {
+            PrimaryKey::Flat { name, .. } => name,
+            PrimaryKey::Nested { name } => name,
+        }
+    }
+
+    pub fn document_id(
+        &self,
+        document: &obkv::KvReader<'_, FieldId>,
+        fields: &impl FieldIdMapper,
+    ) -> Result<StdResult<String, DocumentIdExtractionError>> {
+        match self {
+            PrimaryKey::Flat { name: _, field_id } => match document.get(*field_id) {
+                Some(document_id_bytes) => {
+                    let document_id = serde_json::from_slice(document_id_bytes)
+                        .map_err(InternalError::SerdeJson)?;
+                    match validate_document_id_value(document_id) {
+                        Ok(document_id) => Ok(Ok(document_id)),
+                        Err(user_error) => {
+                            Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
+                        }
+                    }
+                }
+                None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
+            },
+            nested @ PrimaryKey::Nested { .. } => {
+                let mut matching_documents_ids = Vec::new();
+                for (first_level_name, right) in nested.possible_level_names() {
+                    if let Some(field_id) = fields.id(first_level_name) {
+                        if let Some(value_bytes) = document.get(field_id) {
+                            let object = serde_json::from_slice(value_bytes)
+                                .map_err(InternalError::SerdeJson)?;
+                            fetch_matching_values(object, right, &mut matching_documents_ids);
+
+                            if matching_documents_ids.len() >= 2 {
+                                return Ok(Err(DocumentIdExtractionError::TooManyDocumentIds(
+                                    matching_documents_ids.len(),
+                                )));
+                            }
+                        }
+                    }
+                }
+
+                match matching_documents_ids.pop() {
+                    Some(document_id) => match validate_document_id_value(document_id) {
+                        Ok(document_id) => Ok(Ok(document_id)),
+                        Err(user_error) => {
+                            Ok(Err(DocumentIdExtractionError::InvalidDocumentId(user_error)))
+                        }
+                    },
+                    None => Ok(Err(DocumentIdExtractionError::MissingDocumentId)),
+                }
+            }
+        }
+    }
+
+    /// Returns an `Iterator` that gives all the possible fields names the primary key
+    /// can have depending of the first level name and depth of the objects.
+    pub fn possible_level_names(&self) -> impl Iterator<Item = (&str, &str)> + '_ {
+        let name = self.name();
+        name.match_indices(PRIMARY_KEY_SPLIT_SYMBOL)
+            .map(move |(i, _)| (&name[..i], &name[i + PRIMARY_KEY_SPLIT_SYMBOL.len_utf8()..]))
+            .chain(iter::once((name, "")))
+    }
+}
+
+fn fetch_matching_values(value: Value, selector: &str, output: &mut Vec<Value>) {
+    match value {
+        Value::Object(object) => fetch_matching_values_in_object(object, selector, "", output),
+        otherwise => output.push(otherwise),
+    }
+}
+
+fn fetch_matching_values_in_object(
+    object: Object,
+    selector: &str,
+    base_key: &str,
+    output: &mut Vec<Value>,
+) {
+    for (key, value) in object {
+        let base_key = if base_key.is_empty() {
+            key.to_string()
+        } else {
+            format!("{}{}{}", base_key, PRIMARY_KEY_SPLIT_SYMBOL, key)
+        };
+
+        if starts_with(selector, &base_key) {
+            match value {
+                Value::Object(object) => {
+                    fetch_matching_values_in_object(object, selector, &base_key, output)
+                }
+                value => output.push(value),
+            }
+        }
+    }
+}
+
+fn starts_with(selector: &str, key: &str) -> bool {
+    selector.strip_prefix(key).map_or(false, |tail| {
+        tail.chars().next().map(|c| c == PRIMARY_KEY_SPLIT_SYMBOL).unwrap_or(true)
+    })
+}
+
+// FIXME: move to a DocumentId struct
+
+fn validate_document_id(document_id: &str) -> Option<&str> {
+    if document_id.is_empty()
+        || document_id.len() > 512
+        || !document_id.chars().all(|c| c.is_ascii_alphanumeric() || c == '-' || c == '_')
+    {
+        None
+    } else {
+        Some(document_id)
+    }
+}
+
+pub fn validate_document_id_value(document_id: Value) -> StdResult<String, UserError> {
+    match document_id {
+        Value::String(string) => match validate_document_id(&string) {
+            Some(s) if s.len() == string.len() => Ok(string),
+            Some(s) => Ok(s.to_string()),
+            None => Err(UserError::InvalidDocumentId { document_id: Value::String(string) }),
+        },
+        // a `u64` or `i64` cannot be more than 512 bytes once converted to a string
+        Value::Number(number) if !number.is_f64() => Ok(number.to_string()),
+        content => Err(UserError::InvalidDocumentId { document_id: content }),
+    }
+}
--- a/crates/milli/src/documents/reader.rs
+++ b/crates/milli/src/documents/reader.rs
@ -0,0 +1,117 @@
+use std::convert::TryInto;
+use std::{error, fmt, io};
+
+use obkv::KvReader;
+
+use super::{DocumentsBatchIndex, Error, DOCUMENTS_BATCH_INDEX_KEY};
+use crate::FieldId;
+
+/// The `DocumentsBatchReader` provides a way to iterate over documents that have been created with
+/// a `DocumentsBatchWriter`.
+///
+/// The documents are returned in the form of `obkv::Reader` where each field is identified with a
+/// `FieldId`. The mapping between the field ids and the field names is done thanks to the index.
+pub struct DocumentsBatchReader<R> {
+    cursor: grenad::ReaderCursor<R>,
+    fields_index: DocumentsBatchIndex,
+}
+
+impl<R: io::Read + io::Seek> DocumentsBatchReader<R> {
+    pub fn new(cursor: DocumentsBatchCursor<R>, fields_index: DocumentsBatchIndex) -> Self {
+        Self { cursor: cursor.cursor, fields_index }
+    }
+
+    /// Construct a `DocumentsReader` from a reader.
+    ///
+    /// It first retrieves the index, then moves to the first document. Use the `into_cursor`
+    /// method to iterator over the documents, from the first to the last.
+    #[tracing::instrument(level = "trace", skip_all, target = "indexing::documents")]
+    pub fn from_reader(reader: R) -> Result<Self, Error> {
+        let reader = grenad::Reader::new(reader)?;
+        let mut cursor = reader.into_cursor()?;
+
+        let fields_index = match cursor.move_on_key_equal_to(DOCUMENTS_BATCH_INDEX_KEY)? {
+            Some((_, value)) => serde_json::from_slice(value).map_err(Error::Serialize)?,
+            None => return Err(Error::InvalidDocumentFormat),
+        };
+
+        Ok(DocumentsBatchReader { cursor, fields_index })
+    }
+
+    pub fn documents_count(&self) -> u32 {
+        self.cursor.len().saturating_sub(1).try_into().expect("Invalid number of documents")
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.cursor.len().saturating_sub(1) == 0
+    }
+
+    pub fn documents_batch_index(&self) -> &DocumentsBatchIndex {
+        &self.fields_index
+    }
+
+    /// This method returns a forward cursor over the documents.
+    pub fn into_cursor_and_fields_index(self) -> (DocumentsBatchCursor<R>, DocumentsBatchIndex) {
+        let DocumentsBatchReader { cursor, fields_index } = self;
+        let mut cursor = DocumentsBatchCursor { cursor };
+        cursor.reset();
+        (cursor, fields_index)
+    }
+}
+
+/// A forward cursor over the documents in a `DocumentsBatchReader`.
+pub struct DocumentsBatchCursor<R> {
+    cursor: grenad::ReaderCursor<R>,
+}
+
+impl<R> DocumentsBatchCursor<R> {
+    /// Resets the cursor to be able to read from the start again.
+    pub fn reset(&mut self) {
+        self.cursor.reset();
+    }
+}
+
+impl<R: io::Read + io::Seek> DocumentsBatchCursor<R> {
+    /// Returns the next document, starting from the first one. Subsequent calls to
+    /// `next_document` advance the document reader until all the documents have been read.
+    pub fn next_document(
+        &mut self,
+    ) -> Result<Option<KvReader<'_, FieldId>>, DocumentsBatchCursorError> {
+        match self.cursor.move_on_next()? {
+            Some((key, value)) if key != DOCUMENTS_BATCH_INDEX_KEY => {
+                Ok(Some(KvReader::new(value)))
+            }
+            _otherwise => Ok(None),
+        }
+    }
+}
+
+/// The possible error thrown by the `DocumentsBatchCursor` when iterating on the documents.
+#[derive(Debug)]
+pub enum DocumentsBatchCursorError {
+    Grenad(grenad::Error),
+    SerdeJson(serde_json::Error),
+}
+
+impl From<grenad::Error> for DocumentsBatchCursorError {
+    fn from(error: grenad::Error) -> DocumentsBatchCursorError {
+        DocumentsBatchCursorError::Grenad(error)
+    }
+}
+
+impl From<serde_json::Error> for DocumentsBatchCursorError {
+    fn from(error: serde_json::Error) -> DocumentsBatchCursorError {
+        DocumentsBatchCursorError::SerdeJson(error)
+    }
+}
+
+impl error::Error for DocumentsBatchCursorError {}
+
+impl fmt::Display for DocumentsBatchCursorError {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            DocumentsBatchCursorError::Grenad(e) => e.fmt(f),
+            DocumentsBatchCursorError::SerdeJson(e) => e.fmt(f),
+        }
+    }
+}
--- a/crates/milli/src/documents/serde_impl.rs
+++ b/crates/milli/src/documents/serde_impl.rs
@ -0,0 +1,76 @@
+use std::fmt;
+use std::io::Write;
+
+use serde::de::{DeserializeSeed, MapAccess, SeqAccess, Visitor};
+
+use super::Error;
+use crate::documents::DocumentsBatchBuilder;
+use crate::Object;
+
+macro_rules! tri {
+    ($e:expr) => {
+        match $e {
+            Ok(r) => r,
+            Err(e) => return Ok(Err(e.into())),
+        }
+    };
+}
+
+pub struct DocumentVisitor<'a, W> {
+    inner: &'a mut DocumentsBatchBuilder<W>,
+    object: Object,
+}
+
+impl<'a, W> DocumentVisitor<'a, W> {
+    pub fn new(inner: &'a mut DocumentsBatchBuilder<W>) -> Self {
+        DocumentVisitor { inner, object: Object::new() }
+    }
+}
+
+impl<'a, 'de, W: Write> Visitor<'de> for &mut DocumentVisitor<'a, W> {
+    /// This Visitor value is nothing, since it write the value to a file.
+    type Value = Result<(), Error>;
+
+    fn visit_seq<A>(self, mut seq: A) -> Result<Self::Value, A::Error>
+    where
+        A: SeqAccess<'de>,
+    {
+        while let Some(v) = seq.next_element_seed(&mut *self)? {
+            tri!(v)
+        }
+
+        Ok(Ok(()))
+    }
+
+    fn visit_map<A>(self, mut map: A) -> Result<Self::Value, A::Error>
+    where
+        A: MapAccess<'de>,
+    {
+        self.object.clear();
+        while let Some((key, value)) = map.next_entry()? {
+            self.object.insert(key, value);
+        }
+
+        tri!(self.inner.append_json_object(&self.object));
+
+        Ok(Ok(()))
+    }
+
+    fn expecting(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        write!(f, "a documents, or a sequence of documents.")
+    }
+}
+
+impl<'a, 'de, W> DeserializeSeed<'de> for &mut DocumentVisitor<'a, W>
+where
+    W: Write,
+{
+    type Value = Result<(), Error>;
+
+    fn deserialize<D>(self, deserializer: D) -> Result<Self::Value, D::Error>
+    where
+        D: serde::Deserializer<'de>,
+    {
+        deserializer.deserialize_map(self)
+    }
+}
--- a/crates/milli/src/error.rs
+++ b/crates/milli/src/error.rs
@ -0,0 +1,504 @@
+use std::collections::BTreeSet;
+use std::convert::Infallible;
+use std::fmt::Write;
+use std::{io, str};
+
+use heed::{Error as HeedError, MdbError};
+use rayon::ThreadPoolBuildError;
+use rhai::EvalAltResult;
+use serde_json::Value;
+use thiserror::Error;
+
+use crate::documents::{self, DocumentsBatchCursorError};
+use crate::thread_pool_no_abort::PanicCatched;
+use crate::{CriterionError, DocumentId, FieldId, Object, SortError};
+
+pub fn is_reserved_keyword(keyword: &str) -> bool {
+    ["_geo", "_geoDistance", "_geoPoint", "_geoRadius", "_geoBoundingBox"].contains(&keyword)
+}
+
+#[derive(Error, Debug)]
+pub enum Error {
+    #[error("internal: {0}.")]
+    InternalError(#[from] InternalError),
+    #[error(transparent)]
+    IoError(#[from] io::Error),
+    #[error(transparent)]
+    UserError(#[from] UserError),
+}
+
+#[derive(Error, Debug)]
+pub enum InternalError {
+    #[error("{}", HeedError::DatabaseClosing)]
+    DatabaseClosing,
+    #[error("Missing {} in the {db_name} database.", key.unwrap_or("key"))]
+    DatabaseMissingEntry { db_name: &'static str, key: Option<&'static str> },
+    #[error("Missing {key} in the fieldids weights mapping.")]
+    FieldidsWeightsMapMissingEntry { key: FieldId },
+    #[error(transparent)]
+    FieldIdMapMissingEntry(#[from] FieldIdMapMissingEntry),
+    #[error("Missing {key} in the field id mapping.")]
+    FieldIdMappingMissingEntry { key: FieldId },
+    #[error(transparent)]
+    Fst(#[from] fst::Error),
+    #[error(transparent)]
+    DocumentsError(#[from] documents::Error),
+    #[error("Invalid compression type have been specified to grenad")]
+    GrenadInvalidCompressionType,
+    #[error("Invalid grenad file with an invalid version format")]
+    GrenadInvalidFormatVersion,
+    #[error("Invalid merge while processing {process}")]
+    IndexingMergingKeys { process: &'static str },
+    #[error(transparent)]
+    RayonThreadPool(#[from] ThreadPoolBuildError),
+    #[error(transparent)]
+    PanicInThreadPool(#[from] PanicCatched),
+    #[error(transparent)]
+    SerdeJson(#[from] serde_json::Error),
+    #[error(transparent)]
+    Serialization(#[from] SerializationError),
+    #[error(transparent)]
+    Store(#[from] MdbError),
+    #[error(transparent)]
+    Utf8(#[from] str::Utf8Error),
+    #[error("An indexation process was explicitly aborted")]
+    AbortedIndexation,
+    #[error("The matching words list contains at least one invalid member")]
+    InvalidMatchingWords,
+    #[error(transparent)]
+    ArroyError(#[from] arroy::Error),
+    #[error(transparent)]
+    VectorEmbeddingError(#[from] crate::vector::Error),
+}
+
+#[derive(Error, Debug)]
+pub enum SerializationError {
+    #[error("{}", match .db_name {
+        Some(name) => format!("decoding from the {name} database failed"),
+        None => "decoding failed".to_string(),
+    })]
+    Decoding { db_name: Option<&'static str> },
+    #[error("{}", match .db_name {
+        Some(name) => format!("encoding into the {name} database failed"),
+        None => "encoding failed".to_string(),
+    })]
+    Encoding { db_name: Option<&'static str> },
+    #[error("number is not a valid finite number")]
+    InvalidNumberSerialization,
+}
+
+#[derive(Error, Debug)]
+pub enum FieldIdMapMissingEntry {
+    #[error("unknown field id {field_id} coming from the {process} process")]
+    FieldId { field_id: FieldId, process: &'static str },
+    #[error("unknown field name {field_name} coming from the {process} process")]
+    FieldName { field_name: String, process: &'static str },
+}
+
+#[derive(Error, Debug)]
+pub enum UserError {
+    #[error("A document cannot contain more than 65,535 fields.")]
+    AttributeLimitReached,
+    #[error(transparent)]
+    CriterionError(#[from] CriterionError),
+    #[error("Maximum number of documents reached.")]
+    DocumentLimitReached,
+    #[error(
+        "Document identifier `{}` is invalid. \
+A document identifier can be of type integer or string, \
+only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and underscores (_), \
+and can not be more than 512 bytes.", .document_id.to_string()
+    )]
+    InvalidDocumentId { document_id: Value },
+    #[error("Invalid facet distribution, {}", format_invalid_filter_distribution(.invalid_facets_name, .valid_facets_name))]
+    InvalidFacetsDistribution {
+        invalid_facets_name: BTreeSet<String>,
+        valid_facets_name: BTreeSet<String>,
+    },
+    #[error(transparent)]
+    InvalidGeoField(#[from] GeoError),
+    #[error("Invalid vector dimensions: expected: `{}`, found: `{}`.", .expected, .found)]
+    InvalidVectorDimensions { expected: usize, found: usize },
+    #[error("The `_vectors` field in the document with id: `{document_id}` is not an object. Was expecting an object with a key for each embedder with manually provided vectors, but instead got `{value}`")]
+    InvalidVectorsMapType { document_id: String, value: Value },
+    #[error("Bad embedder configuration in the document with id: `{document_id}`. {error}")]
+    InvalidVectorsEmbedderConf { document_id: String, error: deserr::errors::JsonError },
+    #[error("{0}")]
+    InvalidFilter(String),
+    #[error("Invalid type for filter subexpression: expected: {}, found: {1}.", .0.join(", "))]
+    InvalidFilterExpression(&'static [&'static str], Value),
+    #[error("Attribute `{}` is not sortable. {}",
+        .field,
+        match .valid_fields.is_empty() {
+            true => "This index does not have configured sortable attributes.".to_string(),
+            false => format!("Available sortable attributes are: `{}{}`.",
+                    valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
+                    .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
+                ),
+        }
+    )]
+    InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
+    #[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}",
+        .field,
+        match .valid_fields.is_empty() {
+            true => "This index does not have configured filterable attributes.".to_string(),
+            false => format!("Available filterable attributes are: `{}{}`.",
+                    valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
+                    .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
+                ),
+        }
+    )]
+    InvalidDistinctAttribute { field: String, valid_fields: BTreeSet<String>, hidden_fields: bool },
+    #[error("Attribute `{}` is not facet-searchable. {}",
+        .field,
+        match .valid_fields.is_empty() {
+            true => "This index does not have configured facet-searchable attributes. To make it facet-searchable add it to the `filterableAttributes` index settings.".to_string(),
+            false => format!("Available facet-searchable attributes are: `{}{}`. To make it facet-searchable add it to the `filterableAttributes` index settings.",
+                    valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
+                    .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
+                ),
+        }
+    )]
+    InvalidFacetSearchFacetName {
+        field: String,
+        valid_fields: BTreeSet<String>,
+        hidden_fields: bool,
+    },
+    #[error("Attribute `{}` is not searchable. Available searchable attributes are: `{}{}`.",
+        .field,
+        .valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", "),
+        .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""),
+    )]
+    InvalidSearchableAttribute {
+        field: String,
+        valid_fields: BTreeSet<String>,
+        hidden_fields: bool,
+    },
+    #[error("an environment is already opened with different options")]
+    InvalidLmdbOpenOptions,
+    #[error("You must specify where `sort` is listed in the rankingRules setting to use the sort parameter at search time.")]
+    SortRankingRuleMissing,
+    #[error("The database file is in an invalid state.")]
+    InvalidStoreFile,
+    #[error("Maximum database size has been reached.")]
+    MaxDatabaseSizeReached,
+    #[error("Document doesn't have a `{}` attribute: `{}`.", .primary_key, serde_json::to_string(.document).unwrap())]
+    MissingDocumentId { primary_key: String, document: Object },
+    #[error("Document have too many matching `{}` attribute: `{}`.", .primary_key, serde_json::to_string(.document).unwrap())]
+    TooManyDocumentIds { primary_key: String, document: Object },
+    #[error("The primary key inference failed as the engine did not find any field ending with `id` in its name. Please specify the primary key manually using the `primaryKey` query parameter.")]
+    NoPrimaryKeyCandidateFound,
+    #[error("The primary key inference failed as the engine found {} fields ending with `id` in their names: '{}' and '{}'. Please specify the primary key manually using the `primaryKey` query parameter.", .candidates.len(), .candidates.first().unwrap(), .candidates.get(1).unwrap())]
+    MultiplePrimaryKeyCandidatesFound { candidates: Vec<String> },
+    #[error("There is no more space left on the device. Consider increasing the size of the disk/partition.")]
+    NoSpaceLeftOnDevice,
+    #[error("Index already has a primary key: `{0}`.")]
+    PrimaryKeyCannotBeChanged(String),
+    #[error(transparent)]
+    SerdeJson(serde_json::Error),
+    #[error(transparent)]
+    SortError(#[from] SortError),
+    #[error("An unknown internal document id have been used: `{document_id}`.")]
+    UnknownInternalDocumentId { document_id: DocumentId },
+    #[error("`minWordSizeForTypos` setting is invalid. `oneTypo` and `twoTypos` fields should be between `0` and `255`, and `twoTypos` should be greater or equals to `oneTypo` but found `oneTypo: {0}` and twoTypos: {1}`.")]
+    InvalidMinTypoWordLenSetting(u8, u8),
+    #[error(transparent)]
+    VectorEmbeddingError(#[from] crate::vector::Error),
+    #[error(transparent)]
+    MissingDocumentField(#[from] crate::prompt::error::RenderPromptError),
+    #[error(transparent)]
+    InvalidPrompt(#[from] crate::prompt::error::NewPromptError),
+    #[error("`.embedders.{0}.documentTemplate`: Invalid template: {1}.")]
+    InvalidPromptForEmbeddings(String, crate::prompt::error::NewPromptError),
+    #[error("Too many embedders in the configuration. Found {0}, but limited to 256.")]
+    TooManyEmbedders(usize),
+    #[error("Cannot find embedder with name `{0}`.")]
+    InvalidEmbedder(String),
+    #[error("Too many vectors for document with id {0}: found {1}, but limited to 256.")]
+    TooManyVectors(String, usize),
+    #[error("`.embedders.{embedder_name}`: Field `{field}` unavailable for source `{source_}` (only available for sources: {}). Available fields: {}",
+        allowed_sources_for_field
+         .iter()
+         .map(|accepted| format!("`{}`", accepted))
+         .collect::<Vec<String>>()
+         .join(", "),
+        allowed_fields_for_source
+         .iter()
+         .map(|accepted| format!("`{}`", accepted))
+         .collect::<Vec<String>>()
+         .join(", ")
+    )]
+    InvalidFieldForSource {
+        embedder_name: String,
+        source_: crate::vector::settings::EmbedderSource,
+        field: &'static str,
+        allowed_fields_for_source: &'static [&'static str],
+        allowed_sources_for_field: &'static [crate::vector::settings::EmbedderSource],
+    },
+    #[error("`.embedders.{embedder_name}.model`: Invalid model `{model}` for OpenAI. Supported models: {:?}", crate::vector::openai::EmbeddingModel::supported_models())]
+    InvalidOpenAiModel { embedder_name: String, model: String },
+    #[error("`.embedders.{embedder_name}`: Missing field `{field}` (note: this field is mandatory for source {source_})")]
+    MissingFieldForSource {
+        field: &'static str,
+        source_: crate::vector::settings::EmbedderSource,
+        embedder_name: String,
+    },
+    #[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its native dimensions of {expected_dimensions}. Found {dimensions}")]
+    InvalidOpenAiModelDimensions {
+        embedder_name: String,
+        model: &'static str,
+        dimensions: usize,
+        expected_dimensions: usize,
+    },
+    #[error("`.embedders.{embedder_name}.dimensions`: Model `{model}` does not support overriding its dimensions to a value higher than {max_dimensions}. Found {dimensions}")]
+    InvalidOpenAiModelDimensionsMax {
+        embedder_name: String,
+        model: &'static str,
+        dimensions: usize,
+        max_dimensions: usize,
+    },
+    #[error("`.embedders.{embedder_name}.dimensions`: `dimensions` cannot be zero")]
+    InvalidSettingsDimensions { embedder_name: String },
+    #[error(
+        "`.embedders.{embedder_name}.binaryQuantized`: Cannot disable the binary quantization.\n - Note: Binary quantization is a lossy operation that cannot be reverted.\n - Hint: Add a new embedder that is non-quantized and regenerate the vectors."
+    )]
+    InvalidDisableBinaryQuantization { embedder_name: String },
+    #[error("`.embedders.{embedder_name}.documentTemplateMaxBytes`: `documentTemplateMaxBytes` cannot be zero")]
+    InvalidSettingsDocumentTemplateMaxBytes { embedder_name: String },
+    #[error("`.embedders.{embedder_name}.url`: could not parse `{url}`: {inner_error}")]
+    InvalidUrl { embedder_name: String, inner_error: url::ParseError, url: String },
+    #[error("Document editions cannot modify a document's primary key")]
+    DocumentEditionCannotModifyPrimaryKey,
+    #[error("Document editions must keep documents as objects")]
+    DocumentEditionDocumentMustBeObject,
+    #[error("Document edition runtime error encountered while running the function: {0}")]
+    DocumentEditionRuntimeError(Box<EvalAltResult>),
+    #[error("Document edition runtime error encountered while compiling the function: {0}")]
+    DocumentEditionCompilationError(rhai::ParseError),
+    #[error("{0}")]
+    DocumentEmbeddingError(String),
+}
+
+impl From<crate::vector::Error> for Error {
+    fn from(value: crate::vector::Error) -> Self {
+        match value.fault() {
+            FaultSource::User => Error::UserError(value.into()),
+            FaultSource::Runtime => Error::UserError(value.into()),
+            FaultSource::Bug => Error::InternalError(value.into()),
+            FaultSource::Undecided => Error::UserError(value.into()),
+        }
+    }
+}
+
+impl From<arroy::Error> for Error {
+    fn from(value: arroy::Error) -> Self {
+        match value {
+            arroy::Error::Heed(heed) => heed.into(),
+            arroy::Error::Io(io) => io.into(),
+            arroy::Error::InvalidVecDimension { expected, received } => {
+                Error::UserError(UserError::InvalidVectorDimensions { expected, found: received })
+            }
+            arroy::Error::BuildCancelled => Error::InternalError(InternalError::AbortedIndexation),
+            arroy::Error::DatabaseFull
+            | arroy::Error::InvalidItemAppend
+            | arroy::Error::UnmatchingDistance { .. }
+            | arroy::Error::NeedBuild(_)
+            | arroy::Error::MissingKey { .. }
+            | arroy::Error::MissingMetadata(_) => {
+                Error::InternalError(InternalError::ArroyError(value))
+            }
+        }
+    }
+}
+
+#[derive(Error, Debug)]
+pub enum GeoError {
+    #[error("The `_geo` field in the document with the id: `{document_id}` is not an object. Was expecting an object with the `_geo.lat` and `_geo.lng` fields but instead got `{value}`.")]
+    NotAnObject { document_id: Value, value: Value },
+    #[error("The `_geo` field in the document with the id: `{document_id}` contains the following unexpected fields: `{value}`.")]
+    UnexpectedExtraFields { document_id: Value, value: Value },
+    #[error("Could not find latitude nor longitude in the document with the id: `{document_id}`. Was expecting `_geo.lat` and `_geo.lng` fields.")]
+    MissingLatitudeAndLongitude { document_id: Value },
+    #[error("Could not find latitude in the document with the id: `{document_id}`. Was expecting a `_geo.lat` field.")]
+    MissingLatitude { document_id: Value },
+    #[error("Could not find longitude in the document with the id: `{document_id}`. Was expecting a `_geo.lng` field.")]
+    MissingLongitude { document_id: Value },
+    #[error("Could not parse latitude nor longitude in the document with the id: `{document_id}`. Was expecting finite numbers but instead got `{lat}` and `{lng}`.")]
+    BadLatitudeAndLongitude { document_id: Value, lat: Value, lng: Value },
+    #[error("Could not parse latitude in the document with the id: `{document_id}`. Was expecting a finite number but instead got `{value}`.")]
+    BadLatitude { document_id: Value, value: Value },
+    #[error("Could not parse longitude in the document with the id: `{document_id}`. Was expecting a finite number but instead got `{value}`.")]
+    BadLongitude { document_id: Value, value: Value },
+}
+
+fn format_invalid_filter_distribution(
+    invalid_facets_name: &BTreeSet<String>,
+    valid_facets_name: &BTreeSet<String>,
+) -> String {
+    if valid_facets_name.is_empty() {
+        return "this index does not have configured filterable attributes.".into();
+    }
+
+    let mut result = String::new();
+
+    match invalid_facets_name.len() {
+        0 => (),
+        1 => write!(
+            result,
+            "attribute `{}` is not filterable.",
+            invalid_facets_name.first().unwrap()
+        )
+        .unwrap(),
+        _ => write!(
+            result,
+            "attributes `{}` are not filterable.",
+            invalid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
+        )
+        .unwrap(),
+    };
+
+    match valid_facets_name.len() {
+        1 => write!(
+            result,
+            " The available filterable attribute is `{}`.",
+            valid_facets_name.first().unwrap()
+        )
+        .unwrap(),
+        _ => write!(
+            result,
+            " The available filterable attributes are `{}`.",
+            valid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
+        )
+        .unwrap(),
+    }
+
+    result
+}
+
+/// A little macro helper to autogenerate From implementation that needs two `Into`.
+/// Given the following parameters: `error_from_sub_error!(FieldIdMapMissingEntry => InternalError)`
+/// the macro will create the following code:
+/// ```ignore
+/// impl From<FieldIdMapMissingEntry> for Error {
+///     fn from(error: FieldIdMapMissingEntry) -> Error {
+///         Error::from(InternalError::from(error))
+///     }
+/// }
+/// ```
+macro_rules! error_from_sub_error {
+    () => {};
+    ($sub:ty => $intermediate:ty) => {
+        impl From<$sub> for Error {
+            fn from(error: $sub) -> Error {
+                Error::from(<$intermediate>::from(error))
+            }
+        }
+    };
+    ($($sub:ty => $intermediate:ty $(,)?),+) => {
+        $(error_from_sub_error!($sub => $intermediate);)+
+    };
+}
+
+error_from_sub_error! {
+    FieldIdMapMissingEntry => InternalError,
+    fst::Error => InternalError,
+    documents::Error => InternalError,
+    str::Utf8Error => InternalError,
+    ThreadPoolBuildError => InternalError,
+    SerializationError => InternalError,
+    GeoError => UserError,
+    CriterionError => UserError,
+}
+
+impl<E> From<grenad::Error<E>> for Error
+where
+    Error: From<E>,
+{
+    fn from(error: grenad::Error<E>) -> Error {
+        match error {
+            grenad::Error::Io(error) => Error::IoError(error),
+            grenad::Error::Merge(error) => Error::from(error),
+            grenad::Error::InvalidCompressionType => {
+                Error::InternalError(InternalError::GrenadInvalidCompressionType)
+            }
+            grenad::Error::InvalidFormatVersion => {
+                Error::InternalError(InternalError::GrenadInvalidFormatVersion)
+            }
+        }
+    }
+}
+
+impl From<DocumentsBatchCursorError> for Error {
+    fn from(error: DocumentsBatchCursorError) -> Error {
+        match error {
+            DocumentsBatchCursorError::Grenad(e) => Error::from(e),
+            DocumentsBatchCursorError::SerdeJson(e) => Error::from(InternalError::from(e)),
+        }
+    }
+}
+
+impl From<Infallible> for Error {
+    fn from(_error: Infallible) -> Error {
+        unreachable!()
+    }
+}
+
+impl From<HeedError> for Error {
+    fn from(error: HeedError) -> Error {
+        use self::Error::*;
+        use self::InternalError::*;
+        use self::SerializationError::*;
+        use self::UserError::*;
+
+        match error {
+            HeedError::Io(error) => Error::from(error),
+            HeedError::Mdb(MdbError::MapFull) => UserError(MaxDatabaseSizeReached),
+            HeedError::Mdb(MdbError::Invalid) => UserError(InvalidStoreFile),
+            HeedError::Mdb(error) => InternalError(Store(error)),
+            // TODO use the encoding
+            HeedError::Encoding(_) => InternalError(Serialization(Encoding { db_name: None })),
+            HeedError::Decoding(_) => InternalError(Serialization(Decoding { db_name: None })),
+            HeedError::DatabaseClosing => InternalError(DatabaseClosing),
+            HeedError::BadOpenOptions { .. } => UserError(InvalidLmdbOpenOptions),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub enum FaultSource {
+    User,
+    Runtime,
+    Bug,
+    Undecided,
+}
+
+impl std::fmt::Display for FaultSource {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        let s = match self {
+            FaultSource::User => "user error",
+            FaultSource::Runtime => "runtime error",
+            FaultSource::Bug => "coding error",
+            FaultSource::Undecided => "error",
+        };
+        f.write_str(s)
+    }
+}
+
+#[test]
+fn conditionally_lookup_for_error_message() {
+    let prefix = "Attribute `name` is not sortable.";
+    let messages = vec![
+        (BTreeSet::new(), "This index does not have configured sortable attributes."),
+        (BTreeSet::from(["age".to_string()]), "Available sortable attributes are: `age`."),
+    ];
+
+    for (list, suffix) in messages {
+        let err = UserError::InvalidSortableAttribute {
+            field: "name".to_string(),
+            valid_fields: list,
+            hidden_fields: false,
+        };
+
+        assert_eq!(err.to_string(), format!("{} {}", prefix, suffix));
+    }
+}
--- a/crates/milli/src/external_documents_ids.rs
+++ b/crates/milli/src/external_documents_ids.rs
@ -0,0 +1,83 @@
+use std::collections::HashMap;
+
+use heed::types::Str;
+use heed::{Database, RoIter, RoTxn, RwTxn};
+
+use crate::{DocumentId, BEU32};
+
+pub enum DocumentOperationKind {
+    Create,
+    Delete,
+}
+
+pub struct DocumentOperation {
+    pub external_id: String,
+    pub internal_id: DocumentId,
+    pub kind: DocumentOperationKind,
+}
+
+pub struct ExternalDocumentsIds(Database<Str, BEU32>);
+
+impl ExternalDocumentsIds {
+    pub fn new(db: Database<Str, BEU32>) -> ExternalDocumentsIds {
+        ExternalDocumentsIds(db)
+    }
+
+    /// Returns `true` if hard and soft external documents lists are empty.
+    pub fn is_empty(&self, rtxn: &RoTxn<'_>) -> heed::Result<bool> {
+        self.0.is_empty(rtxn).map_err(Into::into)
+    }
+
+    pub fn get<A: AsRef<str>>(
+        &self,
+        rtxn: &RoTxn<'_>,
+        external_id: A,
+    ) -> heed::Result<Option<u32>> {
+        self.0.get(rtxn, external_id.as_ref())
+    }
+
+    /// An helper function to debug this type, returns an `HashMap` of both,
+    /// soft and hard fst maps, combined.
+    pub fn to_hash_map(&self, rtxn: &RoTxn<'_>) -> heed::Result<HashMap<String, u32>> {
+        let mut map = HashMap::default();
+        for result in self.0.iter(rtxn)? {
+            let (external, internal) = result?;
+            map.insert(external.to_owned(), internal);
+        }
+        Ok(map)
+    }
+
+    /// Applies the list of operations passed as argument, modifying the current external to internal id mapping.
+    ///
+    /// If the list contains multiple operations on the same external id, then the result is unspecified.
+    ///
+    /// # Panics
+    ///
+    /// - If attempting to delete a document that doesn't exist
+    /// - If attempting to create a document that already exists
+    pub fn apply(
+        &self,
+        wtxn: &mut RwTxn<'_>,
+        operations: Vec<DocumentOperation>,
+    ) -> heed::Result<()> {
+        for DocumentOperation { external_id, internal_id, kind } in operations {
+            match kind {
+                DocumentOperationKind::Create => {
+                    self.0.put(wtxn, &external_id, &internal_id)?;
+                }
+                DocumentOperationKind::Delete => {
+                    if !self.0.delete(wtxn, &external_id)? {
+                        panic!("Attempting to delete a non-existing document")
+                    }
+                }
+            }
+        }
+
+        Ok(())
+    }
+
+    /// Returns an iterator over all the external ids.
+    pub fn iter<'t>(&self, rtxn: &'t RoTxn<'_>) -> heed::Result<RoIter<'t, Str, BEU32>> {
+        self.0.iter(rtxn)
+    }
+}
--- a/crates/milli/src/facet/facet_type.rs
+++ b/crates/milli/src/facet/facet_type.rs
@ -0,0 +1,45 @@
+use std::error::Error;
+use std::fmt;
+use std::str::FromStr;
+
+use serde::{Deserialize, Serialize};
+
+#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash, Serialize, Deserialize)]
+pub enum FacetType {
+    String,
+    Number,
+}
+
+impl fmt::Display for FacetType {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            FacetType::String => f.write_str("string"),
+            FacetType::Number => f.write_str("number"),
+        }
+    }
+}
+
+impl FromStr for FacetType {
+    type Err = InvalidFacetType;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        if s.trim().eq_ignore_ascii_case("string") {
+            Ok(FacetType::String)
+        } else if s.trim().eq_ignore_ascii_case("number") {
+            Ok(FacetType::Number)
+        } else {
+            Err(InvalidFacetType)
+        }
+    }
+}
+
+#[derive(Debug, Copy, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
+pub struct InvalidFacetType;
+
+impl fmt::Display for InvalidFacetType {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.write_str(r#"Invalid facet type, must be "string" or "number""#)
+    }
+}
+
+impl Error for InvalidFacetType {}
--- a/crates/milli/src/facet/facet_value.rs
+++ b/crates/milli/src/facet/facet_value.rs
@ -0,0 +1,56 @@
+use ordered_float::OrderedFloat;
+use serde::{Serialize, Serializer};
+
+#[derive(Debug, Clone, PartialOrd, Ord, PartialEq, Eq, Hash)]
+pub enum FacetValue {
+    String(String),
+    Number(OrderedFloat<f64>),
+}
+
+impl From<String> for FacetValue {
+    fn from(string: String) -> FacetValue {
+        FacetValue::String(string)
+    }
+}
+
+impl From<&str> for FacetValue {
+    fn from(string: &str) -> FacetValue {
+        FacetValue::String(string.to_owned())
+    }
+}
+
+impl From<f64> for FacetValue {
+    fn from(float: f64) -> FacetValue {
+        FacetValue::Number(OrderedFloat(float))
+    }
+}
+
+impl From<OrderedFloat<f64>> for FacetValue {
+    fn from(float: OrderedFloat<f64>) -> FacetValue {
+        FacetValue::Number(float)
+    }
+}
+
+impl From<i64> for FacetValue {
+    fn from(integer: i64) -> FacetValue {
+        FacetValue::Number(OrderedFloat(integer as f64))
+    }
+}
+
+/// We implement Serialize ourselves because we need to always serialize it as a string,
+/// JSON object keys must be strings not numbers.
+// TODO remove this impl and convert them into string, by hand, when required.
+impl Serialize for FacetValue {
+    fn serialize<S>(&self, serializer: S) -> Result<S::Ok, S::Error>
+    where
+        S: Serializer,
+    {
+        match self {
+            FacetValue::String(string) => serializer.serialize_str(string),
+            FacetValue::Number(number) => {
+                let string = number.to_string();
+                serializer.serialize_str(&string)
+            }
+        }
+    }
+}
--- a/crates/milli/src/facet/mod.rs
+++ b/crates/milli/src/facet/mod.rs
@ -0,0 +1,6 @@
+mod facet_type;
+mod facet_value;
+pub mod value_encoding;
+
+pub use self::facet_type::FacetType;
+pub use self::facet_value::FacetValue;
--- a/crates/milli/src/facet/value_encoding.rs
+++ b/crates/milli/src/facet/value_encoding.rs
@ -0,0 +1,49 @@
+// https://stackoverflow.com/a/43305015/1941280
+#[inline]
+pub fn f64_into_bytes(float: f64) -> Option<[u8; 8]> {
+    if float.is_finite() {
+        if float == 0.0 || float == -0.0 {
+            return Some(xor_first_bit(0.0_f64.to_be_bytes()));
+        } else if float.is_sign_negative() {
+            return Some(xor_all_bits(float.to_be_bytes()));
+        } else if float.is_sign_positive() {
+            return Some(xor_first_bit(float.to_be_bytes()));
+        }
+    }
+    None
+}
+
+#[inline]
+fn xor_first_bit(mut x: [u8; 8]) -> [u8; 8] {
+    x[0] ^= 0x80;
+    x
+}
+
+#[inline]
+fn xor_all_bits(mut x: [u8; 8]) -> [u8; 8] {
+    x.iter_mut().for_each(|b| *b ^= 0xff);
+    x
+}
+
+#[cfg(test)]
+mod tests {
+    use std::cmp::Ordering::Less;
+
+    use super::*;
+
+    fn is_sorted<T: Ord>(x: &[T]) -> bool {
+        x.windows(2).map(|x| x[0].cmp(&x[1])).all(|o| o == Less)
+    }
+
+    #[test]
+    fn ordered_f64_bytes() {
+        let a = -13_f64;
+        let b = -10.0;
+        let c = -0.0;
+        let d = 1.0;
+        let e = 43.0;
+
+        let vec: Vec<_> = [a, b, c, d, e].iter().cloned().map(f64_into_bytes).collect();
+        assert!(is_sorted(&vec), "{:?}", vec);
+    }
+}
--- a/crates/milli/src/fieldids_weights_map.rs
+++ b/crates/milli/src/fieldids_weights_map.rs
@ -0,0 +1,55 @@
+//! The fieldids weights map is in charge of storing linking the searchable fields with their weights.
+
+use std::collections::HashMap;
+
+use serde::{Deserialize, Serialize};
+
+use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
+use crate::{FieldId, FieldsIdsMap, Weight};
+
+#[derive(Debug, Default, Serialize, Deserialize)]
+pub struct FieldidsWeightsMap {
+    map: HashMap<FieldId, Weight>,
+}
+
+impl FieldidsWeightsMap {
+    /// Insert a field id -> weigth into the map.
+    /// If the map did not have this key present, `None` is returned.
+    /// If the map did have this key present, the value is updated, and the old value is returned.
+    pub fn insert(&mut self, fid: FieldId, weight: Weight) -> Option<Weight> {
+        self.map.insert(fid, weight)
+    }
+
+    /// Create the map from the fields ids maps.
+    /// Should only be called in the case there are NO searchable attributes.
+    /// All the fields will be inserted in the order of the fields ids map with a weight of 0.
+    pub fn from_field_id_map_without_searchable(fid_map: &FieldsIdsMap) -> Self {
+        FieldidsWeightsMap {
+            map: fid_map
+                .iter()
+                .filter(|(_fid, name)| !crate::is_faceted_by(name, RESERVED_VECTORS_FIELD_NAME))
+                .map(|(fid, _name)| (fid, 0))
+                .collect(),
+        }
+    }
+
+    /// Removes a field id from the map, returning the associated weight previously in the map.
+    pub fn remove(&mut self, fid: FieldId) -> Option<Weight> {
+        self.map.remove(&fid)
+    }
+
+    /// Returns weight corresponding to the key.
+    pub fn weight(&self, fid: FieldId) -> Option<Weight> {
+        self.map.get(&fid).copied()
+    }
+
+    /// Returns highest weight contained in the map if any.
+    pub fn max_weight(&self) -> Option<Weight> {
+        self.map.values().copied().max()
+    }
+
+    /// Return an iterator visiting all field ids in arbitrary order.
+    pub fn ids(&self) -> impl Iterator<Item = FieldId> + '_ {
+        self.map.keys().copied()
+    }
+}
--- a/crates/milli/src/fields_ids_map.rs
+++ b/crates/milli/src/fields_ids_map.rs
@ -0,0 +1,167 @@
+use std::collections::BTreeMap;
+
+use serde::{Deserialize, Serialize};
+
+use crate::FieldId;
+
+#[derive(Debug, Clone, Serialize, Deserialize)]
+pub struct FieldsIdsMap {
+    names_ids: BTreeMap<String, FieldId>,
+    ids_names: BTreeMap<FieldId, String>,
+    next_id: Option<FieldId>,
+}
+
+impl FieldsIdsMap {
+    pub fn new() -> FieldsIdsMap {
+        FieldsIdsMap { names_ids: BTreeMap::new(), ids_names: BTreeMap::new(), next_id: Some(0) }
+    }
+
+    /// Returns the number of fields ids in the map.
+    pub fn len(&self) -> usize {
+        self.names_ids.len()
+    }
+
+    /// Returns `true` if the map is empty.
+    pub fn is_empty(&self) -> bool {
+        self.names_ids.is_empty()
+    }
+
+    /// Returns the field id related to a field name, it will create a new field id if the
+    /// name is not already known. Returns `None` if the maximum field id as been reached.
+    pub fn insert(&mut self, name: &str) -> Option<FieldId> {
+        match self.names_ids.get(name) {
+            Some(id) => Some(*id),
+            None => {
+                let id = self.next_id?;
+                self.next_id = id.checked_add(1);
+                self.names_ids.insert(name.to_owned(), id);
+                self.ids_names.insert(id, name.to_owned());
+                Some(id)
+            }
+        }
+    }
+
+    /// Get the ids of a field and all its nested fields based on its name.
+    pub fn nested_ids(&self, name: &str) -> Vec<FieldId> {
+        self.names_ids
+            .range(name.to_string()..)
+            .take_while(|(key, _)| key.starts_with(name))
+            .filter(|(key, _)| crate::is_faceted_by(key, name))
+            .map(|(_name, id)| *id)
+            .collect()
+    }
+
+    /// Get the id of a field based on its name.
+    pub fn id(&self, name: &str) -> Option<FieldId> {
+        self.names_ids.get(name).copied()
+    }
+
+    /// Get the name of a field based on its id.
+    pub fn name(&self, id: FieldId) -> Option<&str> {
+        self.ids_names.get(&id).map(String::as_str)
+    }
+
+    /// Remove a field name and id based on its name.
+    pub fn remove(&mut self, name: &str) -> Option<FieldId> {
+        match self.names_ids.remove(name) {
+            Some(id) => self.ids_names.remove_entry(&id).map(|(id, _)| id),
+            None => None,
+        }
+    }
+
+    /// Iterate over the ids and names in the ids order.
+    pub fn iter(&self) -> impl Iterator<Item = (FieldId, &str)> {
+        self.ids_names.iter().map(|(id, name)| (*id, name.as_str()))
+    }
+
+    /// Iterate over the ids in the order of the ids.
+    pub fn ids(&'_ self) -> impl Iterator<Item = FieldId> + '_ {
+        self.ids_names.keys().copied()
+    }
+
+    /// Iterate over the names in the order of the ids.
+    pub fn names(&self) -> impl Iterator<Item = &str> {
+        self.ids_names.values().map(AsRef::as_ref)
+    }
+}
+
+impl Default for FieldsIdsMap {
+    fn default() -> FieldsIdsMap {
+        FieldsIdsMap::new()
+    }
+}
+
+impl crate::documents::FieldIdMapper for FieldsIdsMap {
+    fn id(&self, name: &str) -> Option<FieldId> {
+        self.id(name)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn fields_ids_map() {
+        let mut map = FieldsIdsMap::new();
+
+        assert_eq!(map.insert("id"), Some(0));
+        assert_eq!(map.insert("title"), Some(1));
+        assert_eq!(map.insert("description"), Some(2));
+        assert_eq!(map.insert("id"), Some(0));
+        assert_eq!(map.insert("title"), Some(1));
+        assert_eq!(map.insert("description"), Some(2));
+
+        assert_eq!(map.id("id"), Some(0));
+        assert_eq!(map.id("title"), Some(1));
+        assert_eq!(map.id("description"), Some(2));
+        assert_eq!(map.id("date"), None);
+
+        assert_eq!(map.len(), 3);
+
+        assert_eq!(map.name(0), Some("id"));
+        assert_eq!(map.name(1), Some("title"));
+        assert_eq!(map.name(2), Some("description"));
+        assert_eq!(map.name(4), None);
+
+        assert_eq!(map.remove("title"), Some(1));
+
+        assert_eq!(map.id("title"), None);
+        assert_eq!(map.insert("title"), Some(3));
+        assert_eq!(map.len(), 3);
+
+        let mut iter = map.iter();
+        assert_eq!(iter.next(), Some((0, "id")));
+        assert_eq!(iter.next(), Some((2, "description")));
+        assert_eq!(iter.next(), Some((3, "title")));
+        assert_eq!(iter.next(), None);
+    }
+
+    #[test]
+    fn nested_fields() {
+        let mut map = FieldsIdsMap::new();
+
+        assert_eq!(map.insert("id"), Some(0));
+        assert_eq!(map.insert("doggo"), Some(1));
+        assert_eq!(map.insert("doggo.name"), Some(2));
+        assert_eq!(map.insert("doggolution"), Some(3));
+        assert_eq!(map.insert("doggo.breed.name"), Some(4));
+        assert_eq!(map.insert("description"), Some(5));
+
+        insta::assert_debug_snapshot!(map.nested_ids("doggo"), @r###"
+        [
+            1,
+            4,
+            2,
+        ]
+        "###);
+
+        insta::assert_debug_snapshot!(map.nested_ids("doggo.breed"), @r###"
+        [
+            4,
+        ]
+        "###);
+
+        insta::assert_debug_snapshot!(map.nested_ids("_vector"), @"[]");
+    }
+}
--- a/crates/milli/src/heed_codec/beu16_str_codec.rs
+++ b/crates/milli/src/heed_codec/beu16_str_codec.rs
@ -0,0 +1,29 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+use std::str;
+
+use heed::BoxedError;
+
+pub struct BEU16StrCodec;
+
+impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
+    type DItem = (u16, &'a str);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let (n_bytes, str_bytes) = bytes.split_at(2);
+        let n = n_bytes.try_into().map(u16::from_be_bytes)?;
+        let s = str::from_utf8(str_bytes)?;
+        Ok((n, s))
+    }
+}
+
+impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
+    type EItem = (u16, &'a str);
+
+    fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        let mut bytes = Vec::with_capacity(s.len() + 2);
+        bytes.extend_from_slice(&n.to_be_bytes());
+        bytes.extend_from_slice(s.as_bytes());
+        Ok(Cow::Owned(bytes))
+    }
+}
--- a/crates/milli/src/heed_codec/beu32_str_codec.rs
+++ b/crates/milli/src/heed_codec/beu32_str_codec.rs
@ -0,0 +1,29 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+use std::str;
+
+use heed::BoxedError;
+
+pub struct BEU32StrCodec;
+
+impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
+    type DItem = (u32, &'a str);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let (n_bytes, str_bytes) = bytes.split_at(4);
+        let n = n_bytes.try_into().map(u32::from_be_bytes)?;
+        let s = str::from_utf8(str_bytes)?;
+        Ok((n, s))
+    }
+}
+
+impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
+    type EItem = (u32, &'a str);
+
+    fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        let mut bytes = Vec::with_capacity(s.len() + 4);
+        bytes.extend_from_slice(&n.to_be_bytes());
+        bytes.extend_from_slice(s.as_bytes());
+        Ok(Cow::Owned(bytes))
+    }
+}
--- a/crates/milli/src/heed_codec/byte_slice_ref.rs
+++ b/crates/milli/src/heed_codec/byte_slice_ref.rs
@ -0,0 +1,23 @@
+use std::borrow::Cow;
+
+use heed::{BoxedError, BytesDecode, BytesEncode};
+
+/// A codec for values of type `&[u8]`. Unlike `Bytes`, its `EItem` and `DItem` associated
+/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
+pub struct BytesRefCodec;
+
+impl<'a> BytesEncode<'a> for BytesRefCodec {
+    type EItem = &'a [u8];
+
+    fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        Ok(Cow::Borrowed(item))
+    }
+}
+
+impl<'a> BytesDecode<'a> for BytesRefCodec {
+    type DItem = &'a [u8];
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        Ok(bytes)
+    }
+}
--- a/crates/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs
+++ b/crates/milli/src/heed_codec/facet/field_doc_id_facet_codec.rs
@ -0,0 +1,47 @@
+use std::borrow::Cow;
+use std::marker::PhantomData;
+
+use heed::{BoxedError, BytesDecode, BytesEncode};
+
+use crate::heed_codec::SliceTooShortError;
+use crate::{try_split_array_at, DocumentId, FieldId};
+
+pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
+
+impl<'a, C> BytesDecode<'a> for FieldDocIdFacetCodec<C>
+where
+    C: BytesDecode<'a>,
+{
+    type DItem = (FieldId, DocumentId, C::DItem);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
+        let field_id = u16::from_be_bytes(field_id_bytes);
+
+        let (document_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
+        let document_id = u32::from_be_bytes(document_id_bytes);
+
+        let value = C::bytes_decode(bytes)?;
+
+        Ok((field_id, document_id, value))
+    }
+}
+
+impl<'a, C> BytesEncode<'a> for FieldDocIdFacetCodec<C>
+where
+    C: BytesEncode<'a>,
+{
+    type EItem = (FieldId, DocumentId, C::EItem);
+
+    fn bytes_encode(
+        (field_id, document_id, value): &'a Self::EItem,
+    ) -> Result<Cow<'a, [u8]>, BoxedError> {
+        let mut bytes = Vec::with_capacity(32);
+        bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
+        bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes
+        let value_bytes = C::bytes_encode(value)?;
+        // variable length, if f64 -> 16 bytes, if string -> large, potentially
+        bytes.extend_from_slice(&value_bytes);
+        Ok(Cow::Owned(bytes))
+    }
+}
--- a/crates/milli/src/heed_codec/facet/mod.rs
+++ b/crates/milli/src/heed_codec/facet/mod.rs
@ -0,0 +1,123 @@
+mod field_doc_id_facet_codec;
+mod ordered_f64_codec;
+
+use std::borrow::Cow;
+use std::convert::TryFrom;
+use std::marker::PhantomData;
+
+use heed::types::DecodeIgnore;
+use heed::{BoxedError, BytesDecode, BytesEncode};
+use roaring::RoaringBitmap;
+
+pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
+pub use self::ordered_f64_codec::OrderedF64Codec;
+use super::StrRefCodec;
+use crate::{CboRoaringBitmapCodec, BEU16};
+
+pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
+pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
+pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
+
+pub type FieldIdCodec = BEU16;
+
+/// Tries to split a slice in half at the given middle point,
+/// `None` if the slice is too short.
+pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
+    if slice.len() >= mid {
+        Some(slice.split_at(mid))
+    } else {
+        None
+    }
+}
+
+/// The key in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`]
+/// databases.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] // TODO: try removing PartialOrd and Ord
+pub struct FacetGroupKey<T> {
+    pub field_id: u16,
+    pub level: u8,
+    pub left_bound: T,
+}
+
+/// The value in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`]
+/// databases.
+#[derive(Debug)]
+pub struct FacetGroupValue {
+    pub size: u8,
+    pub bitmap: RoaringBitmap,
+}
+
+#[derive(Debug)]
+pub struct FacetGroupLazyValue<'b> {
+    pub size: u8,
+    pub bitmap_bytes: &'b [u8],
+}
+
+pub struct FacetGroupKeyCodec<T> {
+    _phantom: PhantomData<T>,
+}
+
+impl<'a, T> heed::BytesEncode<'a> for FacetGroupKeyCodec<T>
+where
+    T: BytesEncode<'a>,
+    T::EItem: Sized,
+{
+    type EItem = FacetGroupKey<T::EItem>;
+
+    fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        let mut v = vec![];
+        v.extend_from_slice(&value.field_id.to_be_bytes());
+        v.extend_from_slice(&[value.level]);
+
+        let bound = T::bytes_encode(&value.left_bound)?;
+        v.extend_from_slice(&bound);
+
+        Ok(Cow::Owned(v))
+    }
+}
+
+impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
+where
+    T: BytesDecode<'a>,
+{
+    type DItem = FacetGroupKey<T::DItem>;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1])?);
+        let level = bytes[2];
+        let bound = T::bytes_decode(&bytes[3..])?;
+        Ok(FacetGroupKey { field_id: fid, level, left_bound: bound })
+    }
+}
+
+pub struct FacetGroupValueCodec;
+
+impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
+    type EItem = FacetGroupValue;
+
+    fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        let mut v = vec![value.size];
+        CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
+        Ok(Cow::Owned(v))
+    }
+}
+
+impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
+    type DItem = FacetGroupValue;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let size = bytes[0];
+        let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
+        Ok(FacetGroupValue { size, bitmap })
+    }
+}
+
+pub struct FacetGroupLazyValueCodec;
+
+impl<'a> heed::BytesDecode<'a> for FacetGroupLazyValueCodec {
+    type DItem = FacetGroupLazyValue<'a>;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        Ok(FacetGroupLazyValue { size: bytes[0], bitmap_bytes: &bytes[1..] })
+    }
+}
--- a/crates/milli/src/heed_codec/facet/ordered_f64_codec.rs
+++ b/crates/milli/src/heed_codec/facet/ordered_f64_codec.rs
@ -0,0 +1,45 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+
+use heed::{BoxedError, BytesDecode};
+use thiserror::Error;
+
+use crate::facet::value_encoding::f64_into_bytes;
+use crate::heed_codec::SliceTooShortError;
+
+pub struct OrderedF64Codec;
+
+impl<'a> BytesDecode<'a> for OrderedF64Codec {
+    type DItem = f64;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        if bytes.len() < 16 {
+            Err(SliceTooShortError.into())
+        } else {
+            bytes[8..].try_into().map(f64::from_be_bytes).map_err(Into::into)
+        }
+    }
+}
+
+impl heed::BytesEncode<'_> for OrderedF64Codec {
+    type EItem = f64;
+
+    fn bytes_encode(f: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
+        let mut buffer = [0u8; 16];
+
+        // write the globally ordered float
+        let bytes = f64_into_bytes(*f).ok_or(InvalidGloballyOrderedFloatError { float: *f })?;
+        buffer[..8].copy_from_slice(&bytes[..]);
+        // Then the f64 value just to be able to read it back
+        let bytes = f.to_be_bytes();
+        buffer[8..16].copy_from_slice(&bytes[..]);
+
+        Ok(Cow::Owned(buffer.to_vec()))
+    }
+}
+
+#[derive(Error, Debug)]
+#[error("the float {float} cannot be converted to a globally ordered representation")]
+pub struct InvalidGloballyOrderedFloatError {
+    float: f64,
+}
--- a/crates/milli/src/heed_codec/field_id_word_count_codec.rs
+++ b/crates/milli/src/heed_codec/field_id_word_count_codec.rs
@ -0,0 +1,30 @@
+use std::borrow::Cow;
+
+use heed::BoxedError;
+
+use super::SliceTooShortError;
+use crate::{try_split_array_at, FieldId};
+
+pub struct FieldIdWordCountCodec;
+
+impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec {
+    type DItem = (FieldId, u8);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let (field_id_bytes, bytes) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
+        let field_id = u16::from_be_bytes(field_id_bytes);
+        let ([word_count], _nothing) = try_split_array_at(bytes).ok_or(SliceTooShortError)?;
+        Ok((field_id, word_count))
+    }
+}
+
+impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec {
+    type EItem = (FieldId, u8);
+
+    fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        let mut bytes = Vec::with_capacity(2 + 1);
+        bytes.extend_from_slice(&field_id.to_be_bytes());
+        bytes.push(*word_count);
+        Ok(Cow::Owned(bytes))
+    }
+}
--- a/crates/milli/src/heed_codec/fst_set_codec.rs
+++ b/crates/milli/src/heed_codec/fst_set_codec.rs
@ -0,0 +1,23 @@
+use std::borrow::Cow;
+
+use fst::Set;
+use heed::{BoxedError, BytesDecode, BytesEncode};
+
+/// A codec for values of type `Set<&[u8]>`.
+pub struct FstSetCodec;
+
+impl<'a> BytesEncode<'a> for FstSetCodec {
+    type EItem = Set<Vec<u8>>;
+
+    fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        Ok(Cow::Borrowed(item.as_fst().as_bytes()))
+    }
+}
+
+impl<'a> BytesDecode<'a> for FstSetCodec {
+    type DItem = Set<&'a [u8]>;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        Set::new(bytes).map_err(Into::into)
+    }
+}
--- a/crates/milli/src/heed_codec/mod.rs
+++ b/crates/milli/src/heed_codec/mod.rs
@ -0,0 +1,39 @@
+mod beu16_str_codec;
+mod beu32_str_codec;
+mod byte_slice_ref;
+pub mod facet;
+mod field_id_word_count_codec;
+mod fst_set_codec;
+mod obkv_codec;
+mod roaring_bitmap;
+mod roaring_bitmap_length;
+mod str_beu32_codec;
+mod str_ref;
+mod str_str_u8_codec;
+
+pub use byte_slice_ref::BytesRefCodec;
+use heed::BoxedError;
+pub use str_ref::StrRefCodec;
+use thiserror::Error;
+
+pub use self::beu16_str_codec::BEU16StrCodec;
+pub use self::beu32_str_codec::BEU32StrCodec;
+pub use self::field_id_word_count_codec::FieldIdWordCountCodec;
+pub use self::fst_set_codec::FstSetCodec;
+pub use self::obkv_codec::ObkvCodec;
+pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
+pub use self::roaring_bitmap_length::{
+    BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
+};
+pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
+pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
+
+pub trait BytesDecodeOwned {
+    type DItem;
+
+    fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError>;
+}
+
+#[derive(Error, Debug)]
+#[error("the slice is too short")]
+pub struct SliceTooShortError;
--- a/crates/milli/src/heed_codec/obkv_codec.rs
+++ b/crates/milli/src/heed_codec/obkv_codec.rs
@ -0,0 +1,22 @@
+use std::borrow::Cow;
+
+use heed::BoxedError;
+use obkv::{KvReaderU16, KvWriterU16};
+
+pub struct ObkvCodec;
+
+impl<'a> heed::BytesDecode<'a> for ObkvCodec {
+    type DItem = KvReaderU16<'a>;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        Ok(KvReaderU16::new(bytes))
+    }
+}
+
+impl heed::BytesEncode<'_> for ObkvCodec {
+    type EItem = KvWriterU16<Vec<u8>>;
+
+    fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
+        item.clone().into_inner().map(Cow::Owned).map_err(Into::into)
+    }
+}
--- a/crates/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap/bo_roaring_bitmap_codec.rs
@ -0,0 +1,50 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+use std::mem::size_of;
+
+use heed::{BoxedError, BytesDecode};
+use roaring::RoaringBitmap;
+
+use crate::heed_codec::BytesDecodeOwned;
+
+pub struct BoRoaringBitmapCodec;
+
+impl BoRoaringBitmapCodec {
+    pub fn serialize_into(bitmap: &RoaringBitmap, out: &mut Vec<u8>) {
+        out.reserve(bitmap.len() as usize * size_of::<u32>());
+        bitmap.iter().map(u32::to_ne_bytes).for_each(|bytes| out.extend_from_slice(&bytes));
+    }
+}
+
+impl BytesDecode<'_> for BoRoaringBitmapCodec {
+    type DItem = RoaringBitmap;
+
+    fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        let mut bitmap = RoaringBitmap::new();
+
+        for chunk in bytes.chunks(size_of::<u32>()) {
+            let bytes = chunk.try_into()?;
+            bitmap.push(u32::from_ne_bytes(bytes));
+        }
+
+        Ok(bitmap)
+    }
+}
+
+impl BytesDecodeOwned for BoRoaringBitmapCodec {
+    type DItem = RoaringBitmap;
+
+    fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        Self::bytes_decode(bytes)
+    }
+}
+
+impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
+    type EItem = RoaringBitmap;
+
+    fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
+        let mut out = Vec::new();
+        BoRoaringBitmapCodec::serialize_into(item, &mut out);
+        Ok(Cow::Owned(out))
+    }
+}
--- a/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs
@ -0,0 +1,247 @@
+use std::borrow::Cow;
+use std::io::{self, Cursor};
+use std::mem::size_of;
+
+use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
+use heed::BoxedError;
+use roaring::RoaringBitmap;
+
+use crate::heed_codec::BytesDecodeOwned;
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};
+
+/// This is the limit where using a byteorder became less size efficient
+/// than using a direct roaring encoding, it is also the point where we are able
+/// to determine the encoding used only by using the array of bytes length.
+pub const THRESHOLD: usize = 7;
+
+/// A conditionnal codec that either use the RoaringBitmap
+/// or a lighter ByteOrder en/decoding method.
+pub struct CboRoaringBitmapCodec;
+
+impl CboRoaringBitmapCodec {
+    pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
+        if roaring.len() <= THRESHOLD as u64 {
+            roaring.len() as usize * size_of::<u32>()
+        } else {
+            roaring.serialized_size()
+        }
+    }
+
+    pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) {
+        if roaring.len() <= THRESHOLD as u64 {
+            // If the number of items (u32s) to encode is less than or equal to the threshold
+            // it means that it would weigh the same or less than the RoaringBitmap
+            // header, so we directly encode them using ByteOrder instead.
+            for integer in roaring {
+                vec.write_u32::<NativeEndian>(integer).unwrap();
+            }
+        } else {
+            // Otherwise, we use the classic RoaringBitmapCodec that writes a header.
+            roaring.serialize_into(vec).unwrap();
+        }
+    }
+
+    pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
+        if bytes.len() <= THRESHOLD * size_of::<u32>() {
+            // If there is threshold or less than threshold integers that can fit into this array
+            // of bytes it means that we used the ByteOrder codec serializer.
+            let mut bitmap = RoaringBitmap::new();
+            while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
+                bitmap.insert(integer);
+            }
+            Ok(bitmap)
+        } else {
+            // Otherwise, it means we used the classic RoaringBitmapCodec and
+            // that the header takes threshold integers.
+            RoaringBitmap::deserialize_unchecked_from(bytes)
+        }
+    }
+
+    pub fn intersection_with_serialized(
+        mut bytes: &[u8],
+        other: &RoaringBitmap,
+    ) -> io::Result<RoaringBitmap> {
+        // See above `deserialize_from` method for implementation details.
+        if bytes.len() <= THRESHOLD * size_of::<u32>() {
+            let mut bitmap = RoaringBitmap::new();
+            while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
+                if other.contains(integer) {
+                    bitmap.insert(integer);
+                }
+            }
+            Ok(bitmap)
+        } else {
+            other.intersection_with_serialized_unchecked(Cursor::new(bytes))
+        }
+    }
+
+    /// Merge serialized CboRoaringBitmaps in a buffer.
+    ///
+    /// if the merged values length is under the threshold, values are directly
+    /// serialized in the buffer else a RoaringBitmap is created from the
+    /// values and is serialized in the buffer.
+    pub fn merge_into<I, A>(slices: I, buffer: &mut Vec<u8>) -> io::Result<()>
+    where
+        I: IntoIterator<Item = A>,
+        A: AsRef<[u8]>,
+    {
+        let mut roaring = RoaringBitmap::new();
+        let mut vec = Vec::new();
+
+        for bytes in slices {
+            if bytes.as_ref().len() <= THRESHOLD * size_of::<u32>() {
+                let mut reader = bytes.as_ref();
+                while let Ok(integer) = reader.read_u32::<NativeEndian>() {
+                    vec.push(integer);
+                }
+            } else {
+                roaring |= RoaringBitmap::deserialize_unchecked_from(bytes.as_ref())?;
+            }
+        }
+
+        if roaring.is_empty() {
+            vec.sort_unstable();
+            vec.dedup();
+
+            if vec.len() <= THRESHOLD {
+                for integer in vec {
+                    buffer.extend_from_slice(&integer.to_ne_bytes());
+                }
+            } else {
+                // We can unwrap safely because the vector is sorted upper.
+                let roaring = RoaringBitmap::from_sorted_iter(vec).unwrap();
+                roaring.serialize_into(buffer)?;
+            }
+        } else {
+            roaring.extend(vec);
+            roaring.serialize_into(buffer)?;
+        }
+
+        Ok(())
+    }
+
+    /// Merges a DelAdd delta into a CboRoaringBitmap.
+    pub fn merge_deladd_into<'a>(
+        deladd: KvReaderDelAdd<'_>,
+        previous: &[u8],
+        buffer: &'a mut Vec<u8>,
+    ) -> io::Result<Option<&'a [u8]>> {
+        // Deserialize the bitmap that is already there
+        let mut previous = Self::deserialize_from(previous)?;
+
+        // Remove integers we no more want in the previous bitmap
+        if let Some(value) = deladd.get(DelAdd::Deletion) {
+            previous -= Self::deserialize_from(value)?;
+        }
+
+        // Insert the new integers we want in the previous bitmap
+        if let Some(value) = deladd.get(DelAdd::Addition) {
+            previous |= Self::deserialize_from(value)?;
+        }
+
+        if previous.is_empty() {
+            return Ok(None);
+        }
+
+        Self::serialize_into(&previous, buffer);
+        Ok(Some(&buffer[..]))
+    }
+}
+
+impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
+    type DItem = RoaringBitmap;
+
+    fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        Self::deserialize_from(bytes).map_err(Into::into)
+    }
+}
+
+impl BytesDecodeOwned for CboRoaringBitmapCodec {
+    type DItem = RoaringBitmap;
+
+    fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        Self::deserialize_from(bytes).map_err(Into::into)
+    }
+}
+
+impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
+    type EItem = RoaringBitmap;
+
+    fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
+        let mut vec = Vec::with_capacity(Self::serialized_size(item));
+        Self::serialize_into(item, &mut vec);
+        Ok(Cow::Owned(vec))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::iter::FromIterator;
+
+    use heed::{BytesDecode, BytesEncode};
+
+    use super::*;
+
+    #[test]
+    fn verify_encoding_decoding() {
+        let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
+        let bytes = CboRoaringBitmapCodec::bytes_encode(&input).unwrap();
+        let output = CboRoaringBitmapCodec::bytes_decode(&bytes).unwrap();
+        assert_eq!(input, output);
+    }
+
+    #[test]
+    fn verify_threshold() {
+        let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
+
+        // use roaring bitmap
+        let mut bytes = Vec::new();
+        input.serialize_into(&mut bytes).unwrap();
+        let roaring_size = bytes.len();
+
+        // use byteorder directly
+        let mut bytes = Vec::new();
+        for integer in input {
+            bytes.write_u32::<NativeEndian>(integer).unwrap();
+        }
+        let bo_size = bytes.len();
+
+        assert!(roaring_size > bo_size);
+    }
+
+    #[test]
+    fn merge_cbo_roaring_bitmaps() {
+        let mut buffer = Vec::new();
+
+        let small_data = [
+            RoaringBitmap::from_sorted_iter(1..4).unwrap(),
+            RoaringBitmap::from_sorted_iter(2..5).unwrap(),
+            RoaringBitmap::from_sorted_iter(4..6).unwrap(),
+            RoaringBitmap::from_sorted_iter(1..3).unwrap(),
+        ];
+
+        let small_data: Vec<_> =
+            small_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
+        CboRoaringBitmapCodec::merge_into(small_data.as_slice(), &mut buffer).unwrap();
+        let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
+        let expected = RoaringBitmap::from_sorted_iter(1..6).unwrap();
+        assert_eq!(bitmap, expected);
+
+        let medium_data = [
+            RoaringBitmap::from_sorted_iter(1..4).unwrap(),
+            RoaringBitmap::from_sorted_iter(2..5).unwrap(),
+            RoaringBitmap::from_sorted_iter(4..8).unwrap(),
+            RoaringBitmap::from_sorted_iter(0..3).unwrap(),
+            RoaringBitmap::from_sorted_iter(7..23).unwrap(),
+        ];
+
+        let medium_data: Vec<_> =
+            medium_data.iter().map(|b| CboRoaringBitmapCodec::bytes_encode(b).unwrap()).collect();
+        buffer.clear();
+        CboRoaringBitmapCodec::merge_into(medium_data.as_slice(), &mut buffer).unwrap();
+
+        let bitmap = CboRoaringBitmapCodec::deserialize_from(&buffer).unwrap();
+        let expected = RoaringBitmap::from_sorted_iter(0..23).unwrap();
+        assert_eq!(bitmap, expected);
+    }
+}
--- a/crates/milli/src/heed_codec/roaring_bitmap/mod.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap/mod.rs
@ -0,0 +1,7 @@
+mod bo_roaring_bitmap_codec;
+pub mod cbo_roaring_bitmap_codec;
+mod roaring_bitmap_codec;
+
+pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
+pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
+pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
--- a/crates/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap/roaring_bitmap_codec.rs
@ -0,0 +1,34 @@
+use std::borrow::Cow;
+
+use heed::BoxedError;
+use roaring::RoaringBitmap;
+
+use crate::heed_codec::BytesDecodeOwned;
+
+pub struct RoaringBitmapCodec;
+
+impl heed::BytesDecode<'_> for RoaringBitmapCodec {
+    type DItem = RoaringBitmap;
+
+    fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        RoaringBitmap::deserialize_unchecked_from(bytes).map_err(Into::into)
+    }
+}
+
+impl BytesDecodeOwned for RoaringBitmapCodec {
+    type DItem = RoaringBitmap;
+
+    fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        RoaringBitmap::deserialize_from(bytes).map_err(Into::into)
+    }
+}
+
+impl heed::BytesEncode<'_> for RoaringBitmapCodec {
+    type EItem = RoaringBitmap;
+
+    fn bytes_encode(item: &Self::EItem) -> Result<Cow<'_, [u8]>, BoxedError> {
+        let mut bytes = Vec::with_capacity(item.serialized_size());
+        item.serialize_into(&mut bytes)?;
+        Ok(Cow::Owned(bytes))
+    }
+}
--- a/crates/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap_length/bo_roaring_bitmap_len_codec.rs
@ -0,0 +1,23 @@
+use std::mem;
+
+use heed::{BoxedError, BytesDecode};
+
+use crate::heed_codec::BytesDecodeOwned;
+
+pub struct BoRoaringBitmapLenCodec;
+
+impl BytesDecode<'_> for BoRoaringBitmapLenCodec {
+    type DItem = u64;
+
+    fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        Ok((bytes.len() / mem::size_of::<u32>()) as u64)
+    }
+}
+
+impl BytesDecodeOwned for BoRoaringBitmapLenCodec {
+    type DItem = u64;
+
+    fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        Self::bytes_decode(bytes)
+    }
+}
--- a/crates/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap_length/cbo_roaring_bitmap_len_codec.rs
@ -0,0 +1,33 @@
+use std::mem;
+
+use heed::{BoxedError, BytesDecode};
+
+use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
+use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
+use crate::heed_codec::BytesDecodeOwned;
+
+pub struct CboRoaringBitmapLenCodec;
+
+impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
+    type DItem = u64;
+
+    fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        if bytes.len() <= THRESHOLD * mem::size_of::<u32>() {
+            // If there is threshold or less than threshold integers that can fit into this array
+            // of bytes it means that we used the ByteOrder codec serializer.
+            BoRoaringBitmapLenCodec::bytes_decode(bytes)
+        } else {
+            // Otherwise, it means we used the classic RoaringBitmapCodec and
+            // that the header takes threshold integers.
+            RoaringBitmapLenCodec::bytes_decode(bytes)
+        }
+    }
+}
+
+impl BytesDecodeOwned for CboRoaringBitmapLenCodec {
+    type DItem = u64;
+
+    fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        Self::bytes_decode(bytes)
+    }
+}
--- a/crates/milli/src/heed_codec/roaring_bitmap_length/mod.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap_length/mod.rs
@ -0,0 +1,7 @@
+mod bo_roaring_bitmap_len_codec;
+mod cbo_roaring_bitmap_len_codec;
+mod roaring_bitmap_len_codec;
+
+pub use self::bo_roaring_bitmap_len_codec::BoRoaringBitmapLenCodec;
+pub use self::cbo_roaring_bitmap_len_codec::CboRoaringBitmapLenCodec;
+pub use self::roaring_bitmap_len_codec::RoaringBitmapLenCodec;
--- a/crates/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs
+++ b/crates/milli/src/heed_codec/roaring_bitmap_length/roaring_bitmap_len_codec.rs
@ -0,0 +1,88 @@
+use std::io::{self, BufRead, Read};
+use std::mem;
+
+use byteorder::{LittleEndian, ReadBytesExt};
+use heed::BoxedError;
+
+use crate::heed_codec::BytesDecodeOwned;
+
+const SERIAL_COOKIE_NO_RUNCONTAINER: u32 = 12346;
+const SERIAL_COOKIE: u16 = 12347;
+
+pub struct RoaringBitmapLenCodec;
+
+impl RoaringBitmapLenCodec {
+    // FIXME should be exported in the RoaringBitmap crate
+    fn deserialize_from_slice(mut bytes: &[u8]) -> io::Result<u64> {
+        let (size, has_offsets) = {
+            let cookie = bytes.read_u32::<LittleEndian>()?;
+            if cookie == SERIAL_COOKIE_NO_RUNCONTAINER {
+                (bytes.read_u32::<LittleEndian>()? as usize, true)
+            } else if (cookie as u16) == SERIAL_COOKIE {
+                return Err(io::Error::new(io::ErrorKind::Other, "run containers are unsupported"));
+            } else {
+                return Err(io::Error::new(io::ErrorKind::Other, "unknown cookie value"));
+            }
+        };
+
+        if size > u16::MAX as usize + 1 {
+            return Err(io::Error::new(io::ErrorKind::Other, "size is greater than supported"));
+        }
+
+        let mut description_bytes = vec![0u8; size * 4];
+        bytes.read_exact(&mut description_bytes)?;
+        let description_bytes = &mut &description_bytes[..];
+
+        if has_offsets {
+            bytes.consume(size * 4);
+        }
+
+        let mut length = 0;
+        for _ in 0..size {
+            let _key = description_bytes.read_u16::<LittleEndian>()?;
+            let len = u64::from(description_bytes.read_u16::<LittleEndian>()?) + 1;
+            length += len;
+
+            if len <= 4096 {
+                bytes.consume(len as usize * mem::size_of::<u16>());
+            } else {
+                bytes.consume(1024 * mem::size_of::<u64>())
+            }
+        }
+
+        Ok(length)
+    }
+}
+
+impl heed::BytesDecode<'_> for RoaringBitmapLenCodec {
+    type DItem = u64;
+
+    fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
+    }
+}
+
+impl BytesDecodeOwned for RoaringBitmapLenCodec {
+    type DItem = u64;
+
+    fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
+        RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use heed::BytesEncode;
+    use roaring::RoaringBitmap;
+
+    use super::*;
+    use crate::heed_codec::RoaringBitmapCodec;
+
+    #[test]
+    fn deserialize_roaring_bitmap_length() {
+        let bitmap: RoaringBitmap = (0..500).chain(800..800_000).chain(920_056..930_032).collect();
+        let bytes = RoaringBitmapCodec::bytes_encode(&bitmap).unwrap();
+        let len = RoaringBitmapLenCodec::deserialize_from_slice(&bytes).unwrap();
+        assert_eq!(bitmap.len(), len);
+    }
+}
--- a/crates/milli/src/heed_codec/str_beu32_codec.rs
+++ b/crates/milli/src/heed_codec/str_beu32_codec.rs
@ -0,0 +1,79 @@
+use std::borrow::Cow;
+use std::convert::TryInto;
+use std::mem::size_of;
+use std::str;
+
+use heed::BoxedError;
+
+use super::SliceTooShortError;
+
+pub struct StrBEU32Codec;
+
+impl<'a> heed::BytesDecode<'a> for StrBEU32Codec {
+    type DItem = (&'a str, u32);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let footer_len = size_of::<u32>();
+
+        if bytes.len() < footer_len {
+            return Err(SliceTooShortError.into());
+        }
+
+        let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
+        let word = str::from_utf8(word)?;
+        let pos = bytes.try_into().map(u32::from_be_bytes)?;
+
+        Ok((word, pos))
+    }
+}
+
+impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
+    type EItem = (&'a str, u32);
+
+    fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        let pos = pos.to_be_bytes();
+
+        let mut bytes = Vec::with_capacity(word.len() + pos.len());
+        bytes.extend_from_slice(word.as_bytes());
+        bytes.extend_from_slice(&pos[..]);
+
+        Ok(Cow::Owned(bytes))
+    }
+}
+
+pub struct StrBEU16Codec;
+
+impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
+    type DItem = (&'a str, u16);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let footer_len = size_of::<u16>();
+
+        if bytes.len() < footer_len + 1 {
+            return Err(SliceTooShortError.into());
+        }
+
+        let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len);
+        // unwrap: we just checked the footer + 1 above.
+        let (_, word) = word_plus_nul_byte.split_last().unwrap();
+        let word = str::from_utf8(word)?;
+        let pos = bytes.try_into().map(u16::from_be_bytes)?;
+
+        Ok((word, pos))
+    }
+}
+
+impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
+    type EItem = (&'a str, u16);
+
+    fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        let pos = pos.to_be_bytes();
+
+        let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());
+        bytes.extend_from_slice(word.as_bytes());
+        bytes.push(0);
+        bytes.extend_from_slice(&pos[..]);
+
+        Ok(Cow::Owned(bytes))
+    }
+}
--- a/crates/milli/src/heed_codec/str_ref.rs
+++ b/crates/milli/src/heed_codec/str_ref.rs
@ -0,0 +1,21 @@
+use std::borrow::Cow;
+
+use heed::{BoxedError, BytesDecode, BytesEncode};
+
+/// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated
+/// types are equivalent (= `&'a str`) and these values can reside within another structure.
+pub struct StrRefCodec;
+impl<'a> BytesEncode<'a> for StrRefCodec {
+    type EItem = &'a str;
+
+    fn bytes_encode(item: &'a &'a str) -> Result<Cow<'a, [u8]>, BoxedError> {
+        Ok(Cow::Borrowed(item.as_bytes()))
+    }
+}
+impl<'a> BytesDecode<'a> for StrRefCodec {
+    type DItem = &'a str;
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        std::str::from_utf8(bytes).map_err(Into::into)
+    }
+}
--- a/crates/milli/src/heed_codec/str_str_u8_codec.rs
+++ b/crates/milli/src/heed_codec/str_str_u8_codec.rs
@ -0,0 +1,62 @@
+use std::borrow::Cow;
+use std::ffi::CStr;
+use std::str;
+
+use heed::BoxedError;
+
+use super::SliceTooShortError;
+
+pub struct U8StrStrCodec;
+
+impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
+    type DItem = (u8, &'a str, &'a str);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?;
+        let cstr = CStr::from_bytes_until_nul(bytes)?;
+        let s1 = cstr.to_str()?;
+        // skip '\0' byte between the two strings.
+        let s2 = str::from_utf8(&bytes[s1.len() + 1..])?;
+        Ok((*n, s1, s2))
+    }
+}
+
+impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
+    type EItem = (u8, &'a str, &'a str);
+
+    fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
+        bytes.push(*n);
+        bytes.extend_from_slice(s1.as_bytes());
+        bytes.push(0);
+        bytes.extend_from_slice(s2.as_bytes());
+        Ok(Cow::Owned(bytes))
+    }
+}
+pub struct UncheckedU8StrStrCodec;
+
+impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
+    type DItem = (u8, &'a [u8], &'a [u8]);
+
+    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
+        let (n, bytes) = bytes.split_first().ok_or(SliceTooShortError)?;
+        let cstr = CStr::from_bytes_until_nul(bytes)?;
+        let s1_bytes = cstr.to_bytes();
+        // skip '\0' byte between the two strings.
+        let s2_bytes = &bytes[s1_bytes.len() + 1..];
+        Ok((*n, s1_bytes, s2_bytes))
+    }
+}
+
+impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
+    type EItem = (u8, &'a [u8], &'a [u8]);
+
+    fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
+        let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
+        bytes.push(*n);
+        bytes.extend_from_slice(s1);
+        bytes.push(0);
+        bytes.extend_from_slice(s2);
+        Ok(Cow::Owned(bytes))
+    }
+}
--- a/crates/milli/src/index.rs
+++ b/crates/milli/src/index.rs
--- a/crates/milli/src/lib.rs
+++ b/crates/milli/src/lib.rs
@ -0,0 +1,448 @@
+#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
+#![allow(clippy::type_complexity)]
+
+#[cfg(test)]
+#[global_allocator]
+pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
+
+#[macro_use]
+pub mod documents;
+
+mod asc_desc;
+mod criterion;
+mod error;
+mod external_documents_ids;
+pub mod facet;
+mod fields_ids_map;
+pub mod heed_codec;
+pub mod index;
+mod localized_attributes_rules;
+pub mod order_by_map;
+pub mod prompt;
+pub mod proximity;
+pub mod score_details;
+mod search;
+mod thread_pool_no_abort;
+pub mod update;
+pub mod vector;
+
+#[cfg(test)]
+#[macro_use]
+pub mod snapshot_tests;
+mod fieldids_weights_map;
+
+use std::collections::{BTreeMap, HashMap};
+use std::convert::{TryFrom, TryInto};
+use std::fmt;
+use std::hash::BuildHasherDefault;
+
+use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer};
+pub use filter_parser::{Condition, FilterCondition, Span, Token};
+use fxhash::{FxHasher32, FxHasher64};
+pub use grenad::CompressionType;
+pub use search::new::{
+    execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, SearchContext,
+    SearchLogger, VisualSearchLogger,
+};
+use serde_json::Value;
+pub use thread_pool_no_abort::{PanicCatched, ThreadPoolNoAbort, ThreadPoolNoAbortBuilder};
+pub use {charabia as tokenizer, heed, rhai};
+
+pub use self::asc_desc::{AscDesc, AscDescError, Member, SortError};
+pub use self::criterion::{default_criteria, Criterion, CriterionError};
+pub use self::error::{
+    Error, FieldIdMapMissingEntry, InternalError, SerializationError, UserError,
+};
+pub use self::external_documents_ids::ExternalDocumentsIds;
+pub use self::fieldids_weights_map::FieldidsWeightsMap;
+pub use self::fields_ids_map::FieldsIdsMap;
+pub use self::heed_codec::{
+    BEU16StrCodec, BEU32StrCodec, BoRoaringBitmapCodec, BoRoaringBitmapLenCodec,
+    CboRoaringBitmapCodec, CboRoaringBitmapLenCodec, FieldIdWordCountCodec, ObkvCodec,
+    RoaringBitmapCodec, RoaringBitmapLenCodec, StrBEU32Codec, U8StrStrCodec,
+    UncheckedU8StrStrCodec,
+};
+pub use self::index::Index;
+pub use self::localized_attributes_rules::LocalizedAttributesRule;
+use self::localized_attributes_rules::LocalizedFieldIds;
+pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
+pub use self::search::similar::Similar;
+pub use self::search::{
+    FacetDistribution, Filter, FormatOptions, MatchBounds, MatcherBuilder, MatchingWords, OrderBy,
+    Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
+};
+
+pub type Result<T> = std::result::Result<T, error::Error>;
+
+pub type Attribute = u32;
+pub type BEU16 = heed::types::U16<heed::byteorder::BE>;
+pub type BEU32 = heed::types::U32<heed::byteorder::BE>;
+pub type BEU64 = heed::types::U64<heed::byteorder::BE>;
+pub type DocumentId = u32;
+pub type FastMap4<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher32>>;
+pub type FastMap8<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher64>>;
+pub type FieldDistribution = BTreeMap<String, u64>;
+pub type FieldId = u16;
+pub type Weight = u16;
+pub type Object = serde_json::Map<String, serde_json::Value>;
+pub type Position = u32;
+pub type RelativePosition = u16;
+pub type SmallString32 = smallstr::SmallString<[u8; 32]>;
+pub type SmallVec16<T> = smallvec::SmallVec<[T; 16]>;
+pub type SmallVec32<T> = smallvec::SmallVec<[T; 32]>;
+pub type SmallVec8<T> = smallvec::SmallVec<[T; 8]>;
+
+/// A GeoPoint is a point in cartesian plan, called xyz_point in the code. Its metadata
+/// is a tuple composed of 1. the DocumentId of the associated document and 2. the original point
+/// expressed in term of latitude and longitude.
+pub type GeoPoint = rstar::primitives::GeomWithData<[f64; 3], (DocumentId, [f64; 2])>;
+
+/// The maximum length a LMDB key can be.
+///
+/// Note that the actual allowed length is a little bit higher, but
+/// we keep a margin of safety.
+const MAX_LMDB_KEY_LENGTH: usize = 500;
+
+/// The maximum length a field value can be when inserted in an LMDB key.
+///
+/// This number is determined by the keys of the different facet databases
+/// and adding a margin of safety.
+pub const MAX_FACET_VALUE_LENGTH: usize = MAX_LMDB_KEY_LENGTH - 32;
+
+/// The maximum length a word can be
+pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2;
+
+pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1;
+
+#[derive(Clone)]
+pub struct TimeBudget {
+    started_at: std::time::Instant,
+    budget: std::time::Duration,
+
+    /// When testing the time budget, ensuring we did more than iteration of the bucket sort can be useful.
+    /// But to avoid being flaky, the only option is to add the ability to stop after a specific number of calls instead of a `Duration`.
+    #[cfg(test)]
+    stop_after: Option<(std::sync::Arc<std::sync::atomic::AtomicUsize>, usize)>,
+}
+
+impl fmt::Debug for TimeBudget {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("TimeBudget")
+            .field("started_at", &self.started_at)
+            .field("budget", &self.budget)
+            .field("left", &(self.budget - self.started_at.elapsed()))
+            .finish()
+    }
+}
+
+impl Default for TimeBudget {
+    fn default() -> Self {
+        Self::new(std::time::Duration::from_millis(1500))
+    }
+}
+
+impl TimeBudget {
+    pub fn new(budget: std::time::Duration) -> Self {
+        Self {
+            started_at: std::time::Instant::now(),
+            budget,
+
+            #[cfg(test)]
+            stop_after: None,
+        }
+    }
+
+    pub fn max() -> Self {
+        Self::new(std::time::Duration::from_secs(u64::MAX))
+    }
+
+    #[cfg(test)]
+    pub fn with_stop_after(mut self, stop_after: usize) -> Self {
+        use std::sync::atomic::AtomicUsize;
+        use std::sync::Arc;
+
+        self.stop_after = Some((Arc::new(AtomicUsize::new(0)), stop_after));
+        self
+    }
+
+    pub fn exceeded(&self) -> bool {
+        #[cfg(test)]
+        if let Some((current, stop_after)) = &self.stop_after {
+            let current = current.fetch_add(1, std::sync::atomic::Ordering::Relaxed);
+            if current >= *stop_after {
+                return true;
+            } else {
+                // if a number has been specified then we ignore entirely the time budget
+                return false;
+            }
+        }
+
+        self.started_at.elapsed() > self.budget
+    }
+}
+
+// Convert an absolute word position into a relative position.
+// Return the field id of the attribute related to the absolute position
+// and the relative position in the attribute.
+pub fn relative_from_absolute_position(absolute: Position) -> (FieldId, RelativePosition) {
+    ((absolute >> 16) as u16, (absolute & 0xFFFF) as u16)
+}
+
+// Compute the absolute word position with the field id of the attribute and relative position in the attribute.
+pub fn absolute_from_relative_position(field_id: FieldId, relative: RelativePosition) -> Position {
+    (field_id as u32) << 16 | (relative as u32)
+}
+// TODO: this is wrong, but will do for now
+/// Compute the "bucketed" absolute position from the field id and relative position in the field.
+///
+/// In a bucketed position, the accuracy of the relative position is reduced exponentially as it gets larger.
+pub fn bucketed_position(relative: u16) -> u16 {
+    // The first few relative positions are kept intact.
+    if relative < 16 {
+        relative
+    } else if relative < 24 {
+        // Relative positions between 16 and 24 all become equal to 24
+        24
+    } else {
+        // Then, groups of positions that have the same base-2 logarithm are reduced to
+        // the same relative position: the smallest power of 2 that is greater than them
+        (relative as f64).log2().ceil().exp2() as u16
+    }
+}
+
+/// Transform a raw obkv store into a JSON Object.
+pub fn obkv_to_json(
+    displayed_fields: &[FieldId],
+    fields_ids_map: &FieldsIdsMap,
+    obkv: obkv::KvReaderU16<'_>,
+) -> Result<Object> {
+    displayed_fields
+        .iter()
+        .copied()
+        .flat_map(|id| obkv.get(id).map(|value| (id, value)))
+        .map(|(id, value)| {
+            let name = fields_ids_map.name(id).ok_or(error::FieldIdMapMissingEntry::FieldId {
+                field_id: id,
+                process: "obkv_to_json",
+            })?;
+            let value = serde_json::from_slice(value).map_err(error::InternalError::SerdeJson)?;
+            Ok((name.to_owned(), value))
+        })
+        .collect()
+}
+
+/// Transform every field of a raw obkv store into a JSON Object.
+pub fn all_obkv_to_json(
+    obkv: obkv::KvReaderU16<'_>,
+    fields_ids_map: &FieldsIdsMap,
+) -> Result<Object> {
+    let all_keys = obkv.iter().map(|(k, _v)| k).collect::<Vec<_>>();
+    obkv_to_json(all_keys.as_slice(), fields_ids_map, obkv)
+}
+
+/// Transform a JSON value into a string that can be indexed.
+pub fn json_to_string(value: &Value) -> Option<String> {
+    fn inner(value: &Value, output: &mut String) -> bool {
+        use std::fmt::Write;
+        match value {
+            Value::Null => false,
+            Value::Bool(boolean) => write!(output, "{}", boolean).is_ok(),
+            Value::Number(number) => write!(output, "{}", number).is_ok(),
+            Value::String(string) => write!(output, "{}", string).is_ok(),
+            Value::Array(array) => {
+                let mut count = 0;
+                for value in array {
+                    if inner(value, output) {
+                        output.push_str(". ");
+                        count += 1;
+                    }
+                }
+                // check that at least one value was written
+                count != 0
+            }
+            Value::Object(object) => {
+                let mut buffer = String::new();
+                let mut count = 0;
+                for (key, value) in object {
+                    buffer.clear();
+                    let _ = write!(&mut buffer, "{}: ", key);
+                    if inner(value, &mut buffer) {
+                        buffer.push_str(". ");
+                        // We write the "key: value. " pair only when
+                        // we are sure that the value can be written.
+                        output.push_str(&buffer);
+                        count += 1;
+                    }
+                }
+                // check that at least one value was written
+                count != 0
+            }
+        }
+    }
+
+    let mut string = String::new();
+    if inner(value, &mut string) {
+        Some(string)
+    } else {
+        None
+    }
+}
+
+/// Divides one slice into two at an index, returns `None` if mid is out of bounds.
+fn try_split_at<T>(slice: &[T], mid: usize) -> Option<(&[T], &[T])> {
+    if mid <= slice.len() {
+        Some(slice.split_at(mid))
+    } else {
+        None
+    }
+}
+
+/// Divides one slice into an array and the tail at an index,
+/// returns `None` if `N` is out of bounds.
+fn try_split_array_at<T, const N: usize>(slice: &[T]) -> Option<([T; N], &[T])>
+where
+    [T; N]: for<'a> TryFrom<&'a [T]>,
+{
+    let (head, tail) = try_split_at(slice, N)?;
+    let head = head.try_into().ok()?;
+    Some((head, tail))
+}
+
+/// Return the distance between two points in meters. Each points are composed of two f64,
+/// one latitude and one longitude.
+pub fn distance_between_two_points(a: &[f64; 2], b: &[f64; 2]) -> f64 {
+    let a = geoutils::Location::new(a[0], a[1]);
+    let b = geoutils::Location::new(b[0], b[1]);
+
+    a.haversine_distance_to(&b).meters()
+}
+
+/// Convert a point expressed in terms of latitude and longitude to a point in the
+/// cartesian coordinate expressed in terms of x, y and z.
+pub fn lat_lng_to_xyz(coord: &[f64; 2]) -> [f64; 3] {
+    let [lat, lng] = coord.map(|f| f.to_radians());
+    let x = lat.cos() * lng.cos();
+    let y = lat.cos() * lng.sin();
+    let z = lat.sin();
+
+    [x, y, z]
+}
+
+/// Returns `true` if the field match one of the faceted fields.
+/// See the function [`is_faceted_by`] below to see what “matching” means.
+pub fn is_faceted(field: &str, faceted_fields: impl IntoIterator<Item = impl AsRef<str>>) -> bool {
+    faceted_fields.into_iter().any(|facet| is_faceted_by(field, facet.as_ref()))
+}
+
+/// Returns `true` if the field match the facet.
+/// ```
+/// use milli::is_faceted_by;
+/// // -- the valid basics
+/// assert!(is_faceted_by("animaux", "animaux"));
+/// assert!(is_faceted_by("animaux.chien", "animaux"));
+/// assert!(is_faceted_by("animaux.chien.race.bouvier bernois.fourrure.couleur", "animaux"));
+/// assert!(is_faceted_by("animaux.chien.race.bouvier bernois.fourrure.couleur", "animaux.chien"));
+/// assert!(is_faceted_by("animaux.chien.race.bouvier bernois.fourrure.couleur", "animaux.chien.race.bouvier bernois"));
+/// assert!(is_faceted_by("animaux.chien.race.bouvier bernois.fourrure.couleur", "animaux.chien.race.bouvier bernois.fourrure"));
+/// assert!(is_faceted_by("animaux.chien.race.bouvier bernois.fourrure.couleur", "animaux.chien.race.bouvier bernois.fourrure.couleur"));
+///
+/// // -- the wrongs
+/// assert!(!is_faceted_by("chien", "chat"));
+/// assert!(!is_faceted_by("animaux", "animaux.chien"));
+/// assert!(!is_faceted_by("animaux.chien", "animaux.chat"));
+///
+/// // -- the strange edge cases
+/// assert!(!is_faceted_by("animaux.chien", "anima"));
+/// assert!(!is_faceted_by("animaux.chien", "animau"));
+/// assert!(!is_faceted_by("animaux.chien", "animaux."));
+/// assert!(!is_faceted_by("animaux.chien", "animaux.c"));
+/// assert!(!is_faceted_by("animaux.chien", "animaux.ch"));
+/// assert!(!is_faceted_by("animaux.chien", "animaux.chi"));
+/// assert!(!is_faceted_by("animaux.chien", "animaux.chie"));
+/// ```
+pub fn is_faceted_by(field: &str, facet: &str) -> bool {
+    field.starts_with(facet) && field[facet.len()..].chars().next().map_or(true, |c| c == '.')
+}
+
+pub fn normalize_facet(original: &str) -> String {
+    CompatibilityDecompositionNormalizer.normalize_str(original.trim()).to_lowercase()
+}
+
+#[cfg(test)]
+mod tests {
+    use serde_json::json;
+
+    use super::*;
+
+    #[test]
+    fn json_to_string_object() {
+        let value = json!({
+            "name": "John Doe",
+            "age": 43,
+            "not_there": null,
+        });
+
+        let string = json_to_string(&value).unwrap();
+        assert_eq!(string, "name: John Doe. age: 43. ");
+    }
+
+    #[test]
+    fn json_to_string_array() {
+        let value = json!([
+            { "name": "John Doe" },
+            43,
+            "hello",
+            [ "I", "am", "fine" ],
+            null,
+        ]);
+
+        let string = json_to_string(&value).unwrap();
+        // We don't care about having two point (.) after the other as
+        // the distance of hard separators is clamped to 8 anyway.
+        assert_eq!(string, "name: John Doe. . 43. hello. I. am. fine. . ");
+    }
+
+    #[test]
+    fn test_relative_position_conversion() {
+        assert_eq!((0x0000, 0x0000), relative_from_absolute_position(0x00000000));
+        assert_eq!((0x0000, 0xFFFF), relative_from_absolute_position(0x0000FFFF));
+        assert_eq!((0xFFFF, 0x0000), relative_from_absolute_position(0xFFFF0000));
+        assert_eq!((0xFF00, 0xFF00), relative_from_absolute_position(0xFF00FF00));
+        assert_eq!((0xFF00, 0x00FF), relative_from_absolute_position(0xFF0000FF));
+        assert_eq!((0x1234, 0x5678), relative_from_absolute_position(0x12345678));
+        assert_eq!((0xFFFF, 0xFFFF), relative_from_absolute_position(0xFFFFFFFF));
+    }
+
+    #[test]
+    fn test_absolute_position_conversion() {
+        assert_eq!(0x00000000, absolute_from_relative_position(0x0000, 0x0000));
+        assert_eq!(0x0000FFFF, absolute_from_relative_position(0x0000, 0xFFFF));
+        assert_eq!(0xFFFF0000, absolute_from_relative_position(0xFFFF, 0x0000));
+        assert_eq!(0xFF00FF00, absolute_from_relative_position(0xFF00, 0xFF00));
+        assert_eq!(0xFF0000FF, absolute_from_relative_position(0xFF00, 0x00FF));
+        assert_eq!(0x12345678, absolute_from_relative_position(0x1234, 0x5678));
+        assert_eq!(0xFFFFFFFF, absolute_from_relative_position(0xFFFF, 0xFFFF));
+    }
+
+    #[test]
+    fn test_all_obkv_to_json() {
+        let mut fields_ids_map = FieldsIdsMap::new();
+        let id1 = fields_ids_map.insert("field1").unwrap();
+        let id2 = fields_ids_map.insert("field2").unwrap();
+
+        let mut writer = obkv::KvWriterU16::memory();
+        writer.insert(id1, b"1234").unwrap();
+        writer.insert(id2, b"4321").unwrap();
+        let contents = writer.into_inner().unwrap();
+        let obkv = obkv::KvReaderU16::new(&contents);
+
+        let expected = json!({
+            "field1": 1234,
+            "field2": 4321,
+        });
+        let expected = expected.as_object().unwrap();
+        let actual = all_obkv_to_json(obkv, &fields_ids_map).unwrap();
+
+        assert_eq!(&actual, expected);
+    }
+}
--- a/crates/milli/src/localized_attributes_rules.rs
+++ b/crates/milli/src/localized_attributes_rules.rs
@ -0,0 +1,129 @@
+use std::collections::HashMap;
+
+use charabia::Language;
+use serde::{Deserialize, Serialize};
+
+use crate::fields_ids_map::FieldsIdsMap;
+use crate::FieldId;
+
+/// A rule that defines which locales are supported for a given attribute.
+///
+/// The rule is a list of attribute patterns and a list of locales.
+/// The attribute patterns are matched against the attribute name.
+/// The pattern `*` matches any attribute name.
+/// The pattern `attribute_name*` matches any attribute name that starts with `attribute_name`.
+/// The pattern `*attribute_name` matches any attribute name that ends with `attribute_name`.
+/// The pattern `*attribute_name*` matches any attribute name that contains `attribute_name`.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct LocalizedAttributesRule {
+    pub attribute_patterns: Vec<String>,
+    pub locales: Vec<Language>,
+}
+
+impl LocalizedAttributesRule {
+    pub fn new(attribute_patterns: Vec<String>, locales: Vec<Language>) -> Self {
+        Self { attribute_patterns, locales }
+    }
+
+    pub fn match_str(&self, str: &str) -> bool {
+        self.attribute_patterns.iter().any(|pattern| match_pattern(pattern.as_str(), str))
+    }
+
+    pub fn locales(&self) -> &[Language] {
+        &self.locales
+    }
+}
+
+fn match_pattern(pattern: &str, str: &str) -> bool {
+    if pattern == "*" {
+        true
+    } else if pattern.starts_with('*') && pattern.ends_with('*') {
+        str.contains(&pattern[1..pattern.len() - 1])
+    } else if let Some(pattern) = pattern.strip_prefix('*') {
+        str.ends_with(pattern)
+    } else if let Some(pattern) = pattern.strip_suffix('*') {
+        str.starts_with(pattern)
+    } else {
+        pattern == str
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct LocalizedFieldIds {
+    field_id_to_locales: HashMap<FieldId, Vec<Language>>,
+}
+
+impl LocalizedFieldIds {
+    pub fn new<I: Iterator<Item = FieldId>>(
+        rules: &Option<Vec<LocalizedAttributesRule>>,
+        fields_ids_map: &FieldsIdsMap,
+        fields_ids: I,
+    ) -> Self {
+        let mut field_id_to_locales = HashMap::new();
+
+        if let Some(rules) = rules {
+            let fields = fields_ids.filter_map(|field_id| {
+                fields_ids_map.name(field_id).map(|field_name| (field_id, field_name))
+            });
+
+            for (field_id, field_name) in fields {
+                let mut locales = Vec::new();
+                for rule in rules {
+                    if rule.match_str(field_name) {
+                        locales.extend(rule.locales.iter());
+                        // Take the first rule that matches
+                        break;
+                    }
+                }
+
+                if !locales.is_empty() {
+                    locales.sort();
+                    locales.dedup();
+                    field_id_to_locales.insert(field_id, locales);
+                }
+            }
+        }
+
+        Self { field_id_to_locales }
+    }
+
+    pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> {
+        self.field_id_to_locales.get(&fields_id).map(Vec::as_slice)
+    }
+
+    pub fn all_locales(&self) -> Vec<Language> {
+        let mut locales = Vec::new();
+        for field_locales in self.field_id_to_locales.values() {
+            if !field_locales.is_empty() {
+                locales.extend(field_locales);
+            } else {
+                // If a field has no locales, we consider it as not localized
+                return Vec::new();
+            }
+        }
+        locales.sort();
+        locales.dedup();
+        locales
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_match_pattern() {
+        assert!(match_pattern("*", "test"));
+        assert!(match_pattern("test*", "test"));
+        assert!(match_pattern("test*", "testa"));
+        assert!(match_pattern("*test", "test"));
+        assert!(match_pattern("*test", "atest"));
+        assert!(match_pattern("*test*", "test"));
+        assert!(match_pattern("*test*", "atesta"));
+        assert!(match_pattern("*test*", "atest"));
+        assert!(match_pattern("*test*", "testa"));
+        assert!(!match_pattern("test*test", "test"));
+        assert!(!match_pattern("*test", "testa"));
+        assert!(!match_pattern("test*", "atest"));
+    }
+}
--- a/crates/milli/src/order_by_map.rs
+++ b/crates/milli/src/order_by_map.rs
@ -0,0 +1,57 @@
+use std::collections::{hash_map, HashMap};
+use std::iter::FromIterator;
+
+use serde::{Deserialize, Deserializer, Serialize};
+
+use crate::OrderBy;
+
+#[derive(Serialize)]
+pub struct OrderByMap(HashMap<String, OrderBy>);
+
+impl OrderByMap {
+    pub fn get(&self, key: impl AsRef<str>) -> OrderBy {
+        self.0
+            .get(key.as_ref())
+            .copied()
+            .unwrap_or_else(|| self.0.get("*").copied().unwrap_or_default())
+    }
+
+    pub fn insert(&mut self, key: String, value: OrderBy) -> Option<OrderBy> {
+        self.0.insert(key, value)
+    }
+}
+
+impl Default for OrderByMap {
+    fn default() -> Self {
+        let mut map = HashMap::new();
+        map.insert("*".to_string(), OrderBy::Lexicographic);
+        OrderByMap(map)
+    }
+}
+
+impl FromIterator<(String, OrderBy)> for OrderByMap {
+    fn from_iter<T: IntoIterator<Item = (String, OrderBy)>>(iter: T) -> Self {
+        OrderByMap(iter.into_iter().collect())
+    }
+}
+
+impl IntoIterator for OrderByMap {
+    type Item = (String, OrderBy);
+    type IntoIter = hash_map::IntoIter<String, OrderBy>;
+
+    fn into_iter(self) -> Self::IntoIter {
+        self.0.into_iter()
+    }
+}
+
+impl<'de> Deserialize<'de> for OrderByMap {
+    fn deserialize<D>(deserializer: D) -> Result<Self, D::Error>
+    where
+        D: Deserializer<'de>,
+    {
+        let mut map = Deserialize::deserialize(deserializer).map(OrderByMap)?;
+        // Insert the default ordering if it is not already overwritten by the user.
+        map.0.entry("*".to_string()).or_insert(OrderBy::default());
+        Ok(map)
+    }
+}
--- a/crates/milli/src/prompt/context.rs
+++ b/crates/milli/src/prompt/context.rs
@ -0,0 +1,97 @@
+use liquid::model::{
+    ArrayView, DisplayCow, KStringCow, ObjectRender, ObjectSource, State, Value as LiquidValue,
+};
+use liquid::{ObjectView, ValueView};
+
+use super::document::Document;
+use super::fields::Fields;
+use super::FieldsIdsMapWithMetadata;
+
+#[derive(Debug, Clone)]
+pub struct Context<'a> {
+    document: &'a Document<'a>,
+    fields: Fields<'a>,
+}
+
+impl<'a> Context<'a> {
+    pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self {
+        Self { document, fields: Fields::new(document, field_id_map) }
+    }
+}
+
+impl<'a> ObjectView for Context<'a> {
+    fn as_value(&self) -> &dyn ValueView {
+        self
+    }
+
+    fn size(&self) -> i64 {
+        2
+    }
+
+    fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
+        Box::new(["doc", "fields"].iter().map(|s| KStringCow::from_static(s)))
+    }
+
+    fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
+        Box::new(
+            std::iter::once(self.document.as_value())
+                .chain(std::iter::once(self.fields.as_value())),
+        )
+    }
+
+    fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
+        Box::new(self.keys().zip(self.values()))
+    }
+
+    fn contains_key(&self, index: &str) -> bool {
+        index == "doc" || index == "fields"
+    }
+
+    fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
+        match index {
+            "doc" => Some(self.document.as_value()),
+            "fields" => Some(self.fields.as_value()),
+            _ => None,
+        }
+    }
+}
+
+impl<'a> ValueView for Context<'a> {
+    fn as_debug(&self) -> &dyn std::fmt::Debug {
+        self
+    }
+
+    fn render(&self) -> liquid::model::DisplayCow<'_> {
+        DisplayCow::Owned(Box::new(ObjectRender::new(self)))
+    }
+
+    fn source(&self) -> liquid::model::DisplayCow<'_> {
+        DisplayCow::Owned(Box::new(ObjectSource::new(self)))
+    }
+
+    fn type_name(&self) -> &'static str {
+        "object"
+    }
+
+    fn query_state(&self, state: liquid::model::State) -> bool {
+        match state {
+            State::Truthy => true,
+            State::DefaultValue | State::Empty | State::Blank => false,
+        }
+    }
+
+    fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
+        let s = ObjectRender::new(self).to_string();
+        KStringCow::from_string(s)
+    }
+
+    fn to_value(&self) -> LiquidValue {
+        LiquidValue::Object(
+            self.iter().map(|(k, x)| (k.to_string().into(), x.to_value())).collect(),
+        )
+    }
+
+    fn as_object(&self) -> Option<&dyn ObjectView> {
+        Some(self)
+    }
+}
--- a/crates/milli/src/prompt/document.rs
+++ b/crates/milli/src/prompt/document.rs
@ -0,0 +1,131 @@
+use std::cell::OnceCell;
+use std::collections::BTreeMap;
+
+use liquid::model::{
+    DisplayCow, KString, KStringCow, ObjectRender, ObjectSource, State, Value as LiquidValue,
+};
+use liquid::{ObjectView, ValueView};
+
+use crate::update::del_add::{DelAdd, KvReaderDelAdd};
+use crate::FieldsIdsMap;
+
+#[derive(Debug, Clone)]
+pub struct Document<'a>(BTreeMap<&'a str, (&'a [u8], ParsedValue)>);
+
+#[derive(Debug, Clone)]
+struct ParsedValue(std::cell::OnceCell<LiquidValue>);
+
+impl ParsedValue {
+    fn empty() -> ParsedValue {
+        ParsedValue(OnceCell::new())
+    }
+
+    fn get(&self, raw: &[u8]) -> &LiquidValue {
+        self.0.get_or_init(|| {
+            let value: serde_json::Value = serde_json::from_slice(raw).unwrap();
+            liquid::model::to_value(&value).unwrap()
+        })
+    }
+}
+
+impl<'a> Document<'a> {
+    pub fn new(
+        data: obkv::KvReaderU16<'a>,
+        side: DelAdd,
+        inverted_field_map: &'a FieldsIdsMap,
+    ) -> Self {
+        let mut out_data = BTreeMap::new();
+        for (fid, raw) in data {
+            let obkv = KvReaderDelAdd::new(raw);
+            let Some(raw) = obkv.get(side) else {
+                continue;
+            };
+            let Some(name) = inverted_field_map.name(fid) else {
+                continue;
+            };
+            out_data.insert(name, (raw, ParsedValue::empty()));
+        }
+        Self(out_data)
+    }
+
+    fn is_empty(&self) -> bool {
+        self.0.is_empty()
+    }
+
+    fn len(&self) -> usize {
+        self.0.len()
+    }
+
+    fn iter(&self) -> impl Iterator<Item = (KString, LiquidValue)> + '_ {
+        self.0.iter().map(|(&k, (raw, data))| (k.to_owned().into(), data.get(raw).to_owned()))
+    }
+}
+
+impl<'a> ObjectView for Document<'a> {
+    fn as_value(&self) -> &dyn ValueView {
+        self
+    }
+
+    fn size(&self) -> i64 {
+        self.len() as i64
+    }
+
+    fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
+        let keys = BTreeMap::keys(&self.0).map(|&s| s.into());
+        Box::new(keys)
+    }
+
+    fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
+        Box::new(self.0.values().map(|(raw, v)| v.get(raw) as &dyn ValueView))
+    }
+
+    fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
+        Box::new(self.0.iter().map(|(&k, (raw, data))| (k.into(), data.get(raw) as &dyn ValueView)))
+    }
+
+    fn contains_key(&self, index: &str) -> bool {
+        self.0.contains_key(index)
+    }
+
+    fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
+        self.0.get(index).map(|(raw, v)| v.get(raw) as &dyn ValueView)
+    }
+}
+
+impl<'a> ValueView for Document<'a> {
+    fn as_debug(&self) -> &dyn std::fmt::Debug {
+        self
+    }
+
+    fn render(&self) -> liquid::model::DisplayCow<'_> {
+        DisplayCow::Owned(Box::new(ObjectRender::new(self)))
+    }
+
+    fn source(&self) -> liquid::model::DisplayCow<'_> {
+        DisplayCow::Owned(Box::new(ObjectSource::new(self)))
+    }
+
+    fn type_name(&self) -> &'static str {
+        "object"
+    }
+
+    fn query_state(&self, state: liquid::model::State) -> bool {
+        match state {
+            State::Truthy => true,
+            State::DefaultValue | State::Empty | State::Blank => self.is_empty(),
+        }
+    }
+
+    fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
+        let s = ObjectRender::new(self).to_string();
+        KStringCow::from_string(s)
+    }
+
+    fn to_value(&self) -> LiquidValue {
+        LiquidValue::Object(self.iter().collect())
+    }
+
+    fn as_object(&self) -> Option<&dyn ObjectView> {
+        Some(self)
+    }
+}
--- a/crates/milli/src/prompt/error.rs
+++ b/crates/milli/src/prompt/error.rs
@ -0,0 +1,56 @@
+use crate::error::FaultSource;
+
+#[derive(Debug, thiserror::Error)]
+#[error("{fault}: {kind}")]
+pub struct NewPromptError {
+    pub kind: NewPromptErrorKind,
+    pub fault: FaultSource,
+}
+
+impl From<NewPromptError> for crate::Error {
+    fn from(value: NewPromptError) -> Self {
+        crate::Error::UserError(crate::UserError::InvalidPrompt(value))
+    }
+}
+
+impl NewPromptError {
+    pub(crate) fn cannot_parse_template(inner: liquid::Error) -> NewPromptError {
+        Self { kind: NewPromptErrorKind::CannotParseTemplate(inner), fault: FaultSource::User }
+    }
+
+    pub(crate) fn invalid_fields_in_template(inner: liquid::Error) -> NewPromptError {
+        Self { kind: NewPromptErrorKind::InvalidFieldsInTemplate(inner), fault: FaultSource::User }
+    }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum NewPromptErrorKind {
+    #[error("cannot parse template: {0}")]
+    CannotParseTemplate(liquid::Error),
+    #[error("template contains invalid fields: {0}. Only `doc.*`, `fields[i].name`, `fields[i].value` are supported")]
+    InvalidFieldsInTemplate(liquid::Error),
+}
+
+#[derive(Debug, thiserror::Error)]
+#[error("{fault}: {kind}")]
+pub struct RenderPromptError {
+    pub kind: RenderPromptErrorKind,
+    pub fault: FaultSource,
+}
+impl RenderPromptError {
+    pub(crate) fn missing_context(inner: liquid::Error) -> RenderPromptError {
+        Self { kind: RenderPromptErrorKind::MissingContext(inner), fault: FaultSource::User }
+    }
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum RenderPromptErrorKind {
+    #[error("missing field in document: {0}")]
+    MissingContext(liquid::Error),
+}
+
+impl From<RenderPromptError> for crate::Error {
+    fn from(value: RenderPromptError) -> Self {
+        crate::Error::UserError(crate::UserError::MissingDocumentField(value))
+    }
+}
--- a/crates/milli/src/prompt/fields.rs
+++ b/crates/milli/src/prompt/fields.rs
@ -0,0 +1,184 @@
+use liquid::model::{
+    ArrayView, DisplayCow, KStringCow, ObjectRender, ObjectSource, State, Value as LiquidValue,
+};
+use liquid::{ObjectView, ValueView};
+
+use super::document::Document;
+use super::{FieldMetadata, FieldsIdsMapWithMetadata};
+#[derive(Debug, Clone)]
+pub struct Fields<'a>(Vec<FieldValue<'a>>);
+
+impl<'a> Fields<'a> {
+    pub fn new(document: &'a Document<'a>, field_id_map: &'a FieldsIdsMapWithMetadata<'a>) -> Self {
+        Self(
+            std::iter::repeat(document)
+                .zip(field_id_map.iter())
+                .map(|(document, (fid, name))| FieldValue {
+                    document,
+                    name,
+                    metadata: field_id_map.metadata(fid).unwrap_or_default(),
+                })
+                .collect(),
+        )
+    }
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct FieldValue<'a> {
+    name: &'a str,
+    document: &'a Document<'a>,
+    metadata: FieldMetadata,
+}
+
+impl<'a> ValueView for FieldValue<'a> {
+    fn as_debug(&self) -> &dyn std::fmt::Debug {
+        self
+    }
+
+    fn render(&self) -> liquid::model::DisplayCow<'_> {
+        DisplayCow::Owned(Box::new(ObjectRender::new(self)))
+    }
+
+    fn source(&self) -> liquid::model::DisplayCow<'_> {
+        DisplayCow::Owned(Box::new(ObjectSource::new(self)))
+    }
+
+    fn type_name(&self) -> &'static str {
+        "object"
+    }
+
+    fn query_state(&self, state: liquid::model::State) -> bool {
+        match state {
+            State::Truthy => true,
+            State::DefaultValue | State::Empty | State::Blank => self.is_empty(),
+        }
+    }
+
+    fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
+        let s = ObjectRender::new(self).to_string();
+        KStringCow::from_string(s)
+    }
+
+    fn to_value(&self) -> LiquidValue {
+        LiquidValue::Object(
+            self.iter().map(|(k, v)| (k.to_string().into(), v.to_value())).collect(),
+        )
+    }
+
+    fn as_object(&self) -> Option<&dyn ObjectView> {
+        Some(self)
+    }
+}
+
+impl<'a> FieldValue<'a> {
+    pub fn name(&self) -> &&'a str {
+        &self.name
+    }
+
+    pub fn value(&self) -> &dyn ValueView {
+        self.document.get(self.name).unwrap_or(&LiquidValue::Nil)
+    }
+
+    pub fn is_searchable(&self) -> &bool {
+        &self.metadata.searchable
+    }
+
+    pub fn is_empty(&self) -> bool {
+        self.size() == 0
+    }
+}
+
+impl<'a> ObjectView for FieldValue<'a> {
+    fn as_value(&self) -> &dyn ValueView {
+        self
+    }
+
+    fn size(&self) -> i64 {
+        2
+    }
+
+    fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
+        Box::new(["name", "value", "is_searchable"].iter().map(|&x| KStringCow::from_static(x)))
+    }
+
+    fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
+        Box::new(
+            std::iter::once(self.name() as &dyn ValueView)
+                .chain(std::iter::once(self.value()))
+                .chain(std::iter::once(self.is_searchable() as &dyn ValueView)),
+        )
+    }
+
+    fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
+        Box::new(self.keys().zip(self.values()))
+    }
+
+    fn contains_key(&self, index: &str) -> bool {
+        index == "name" || index == "value" || index == "is_searchable"
+    }
+
+    fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
+        match index {
+            "name" => Some(self.name()),
+            "value" => Some(self.value()),
+            "is_searchable" => Some(self.is_searchable()),
+            _ => None,
+        }
+    }
+}
+
+impl<'a> ArrayView for Fields<'a> {
+    fn as_value(&self) -> &dyn ValueView {
+        self.0.as_value()
+    }
+
+    fn size(&self) -> i64 {
+        self.0.len() as i64
+    }
+
+    fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
+        self.0.values()
+    }
+
+    fn contains_key(&self, index: i64) -> bool {
+        self.0.contains_key(index)
+    }
+
+    fn get(&self, index: i64) -> Option<&dyn ValueView> {
+        ArrayView::get(&self.0, index)
+    }
+}
+
+impl<'a> ValueView for Fields<'a> {
+    fn as_debug(&self) -> &dyn std::fmt::Debug {
+        self
+    }
+
+    fn render(&self) -> liquid::model::DisplayCow<'_> {
+        self.0.render()
+    }
+
+    fn source(&self) -> liquid::model::DisplayCow<'_> {
+        self.0.source()
+    }
+
+    fn type_name(&self) -> &'static str {
+        self.0.type_name()
+    }
+
+    fn query_state(&self, state: liquid::model::State) -> bool {
+        self.0.query_state(state)
+    }
+
+    fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
+        self.0.to_kstr()
+    }
+
+    fn to_value(&self) -> LiquidValue {
+        self.0.to_value()
+    }
+
+    fn as_array(&self) -> Option<&dyn ArrayView> {
+        Some(self)
+    }
+}
--- a/crates/milli/src/prompt/mod.rs
+++ b/crates/milli/src/prompt/mod.rs
@ -0,0 +1,281 @@
+mod context;
+mod document;
+pub(crate) mod error;
+mod fields;
+mod template_checker;
+
+use std::collections::BTreeMap;
+use std::convert::TryFrom;
+use std::num::NonZeroUsize;
+use std::ops::Deref;
+
+use error::{NewPromptError, RenderPromptError};
+
+use self::context::Context;
+use self::document::Document;
+use crate::update::del_add::DelAdd;
+use crate::{FieldId, FieldsIdsMap};
+
+pub struct Prompt {
+    template: liquid::Template,
+    template_text: String,
+    max_bytes: Option<NonZeroUsize>,
+}
+
+#[derive(Debug, Clone, serde::Serialize, serde::Deserialize)]
+pub struct PromptData {
+    pub template: String,
+    pub max_bytes: Option<NonZeroUsize>,
+}
+
+impl From<Prompt> for PromptData {
+    fn from(value: Prompt) -> Self {
+        Self { template: value.template_text, max_bytes: value.max_bytes }
+    }
+}
+
+impl TryFrom<PromptData> for Prompt {
+    type Error = NewPromptError;
+
+    fn try_from(value: PromptData) -> Result<Self, Self::Error> {
+        Prompt::new(value.template, value.max_bytes)
+    }
+}
+
+impl Clone for Prompt {
+    fn clone(&self) -> Self {
+        let template_text = self.template_text.clone();
+        Self {
+            template: new_template(&template_text).unwrap(),
+            template_text,
+            max_bytes: self.max_bytes,
+        }
+    }
+}
+
+fn new_template(text: &str) -> Result<liquid::Template, liquid::Error> {
+    liquid::ParserBuilder::with_stdlib().build().unwrap().parse(text)
+}
+
+fn default_template() -> liquid::Template {
+    new_template(default_template_text()).unwrap()
+}
+
+fn default_template_text() -> &'static str {
+    "{% for field in fields %}\
+    {% if field.is_searchable and field.value != nil %}\
+    {{ field.name }}: {{ field.value }}\n\
+    {% endif %}\
+    {% endfor %}"
+}
+
+pub fn default_max_bytes() -> NonZeroUsize {
+    NonZeroUsize::new(400).unwrap()
+}
+
+impl Default for Prompt {
+    fn default() -> Self {
+        Self {
+            template: default_template(),
+            template_text: default_template_text().into(),
+            max_bytes: Some(default_max_bytes()),
+        }
+    }
+}
+
+impl Default for PromptData {
+    fn default() -> Self {
+        Self { template: default_template_text().into(), max_bytes: Some(default_max_bytes()) }
+    }
+}
+
+impl Prompt {
+    pub fn new(template: String, max_bytes: Option<NonZeroUsize>) -> Result<Self, NewPromptError> {
+        let this = Self {
+            template: liquid::ParserBuilder::with_stdlib()
+                .build()
+                .unwrap()
+                .parse(&template)
+                .map_err(NewPromptError::cannot_parse_template)?,
+            template_text: template,
+            max_bytes,
+        };
+
+        // render template with special object that's OK with `doc.*` and `fields.*`
+        this.template
+            .render(&template_checker::TemplateChecker)
+            .map_err(NewPromptError::invalid_fields_in_template)?;
+
+        Ok(this)
+    }
+
+    pub fn render(
+        &self,
+        document: obkv::KvReaderU16<'_>,
+        side: DelAdd,
+        field_id_map: &FieldsIdsMapWithMetadata,
+    ) -> Result<String, RenderPromptError> {
+        let document = Document::new(document, side, field_id_map);
+        let context = Context::new(&document, field_id_map);
+
+        let mut rendered =
+            self.template.render(&context).map_err(RenderPromptError::missing_context)?;
+        if let Some(max_bytes) = self.max_bytes {
+            truncate(&mut rendered, max_bytes.get());
+        }
+        Ok(rendered)
+    }
+}
+
+fn truncate(s: &mut String, max_bytes: usize) {
+    if max_bytes >= s.len() {
+        return;
+    }
+    for i in (0..=max_bytes).rev() {
+        if s.is_char_boundary(i) {
+            s.truncate(i);
+            break;
+        }
+    }
+}
+
+pub struct FieldsIdsMapWithMetadata<'a> {
+    fields_ids_map: &'a FieldsIdsMap,
+    metadata: BTreeMap<FieldId, FieldMetadata>,
+}
+
+impl<'a> FieldsIdsMapWithMetadata<'a> {
+    pub fn new(fields_ids_map: &'a FieldsIdsMap, searchable_fields_ids: &'_ [FieldId]) -> Self {
+        let mut metadata: BTreeMap<FieldId, FieldMetadata> =
+            fields_ids_map.ids().map(|id| (id, Default::default())).collect();
+        for searchable_field_id in searchable_fields_ids {
+            let Some(metadata) = metadata.get_mut(searchable_field_id) else { continue };
+            metadata.searchable = true;
+        }
+        Self { fields_ids_map, metadata }
+    }
+
+    pub fn metadata(&self, field_id: FieldId) -> Option<FieldMetadata> {
+        self.metadata.get(&field_id).copied()
+    }
+}
+
+impl<'a> Deref for FieldsIdsMapWithMetadata<'a> {
+    type Target = FieldsIdsMap;
+
+    fn deref(&self) -> &Self::Target {
+        self.fields_ids_map
+    }
+}
+
+#[derive(Debug, Default, Clone, Copy)]
+pub struct FieldMetadata {
+    pub searchable: bool,
+}
+
+#[cfg(test)]
+mod test {
+    use super::Prompt;
+    use crate::error::FaultSource;
+    use crate::prompt::error::{NewPromptError, NewPromptErrorKind};
+    use crate::prompt::truncate;
+
+    #[test]
+    fn default_template() {
+        // does not panic
+        Prompt::default();
+    }
+
+    #[test]
+    fn empty_template() {
+        Prompt::new("".into(), None).unwrap();
+    }
+
+    #[test]
+    fn template_ok() {
+        Prompt::new("{{doc.title}}: {{doc.overview}}".into(), None).unwrap();
+    }
+
+    #[test]
+    fn template_syntax() {
+        assert!(matches!(
+            Prompt::new("{{doc.title: {{doc.overview}}".into(), None),
+            Err(NewPromptError {
+                kind: NewPromptErrorKind::CannotParseTemplate(_),
+                fault: FaultSource::User
+            })
+        ));
+    }
+
+    #[test]
+    fn template_missing_doc() {
+        assert!(matches!(
+            Prompt::new("{{title}}: {{overview}}".into(), None),
+            Err(NewPromptError {
+                kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
+                fault: FaultSource::User
+            })
+        ));
+    }
+
+    #[test]
+    fn template_nested_doc() {
+        Prompt::new("{{doc.actor.firstName}}: {{doc.actor.lastName}}".into(), None).unwrap();
+    }
+
+    #[test]
+    fn template_fields() {
+        Prompt::new("{% for field in fields %}{{field}}{% endfor %}".into(), None).unwrap();
+    }
+
+    #[test]
+    fn template_fields_ok() {
+        Prompt::new(
+            "{% for field in fields %}{{field.name}}: {{field.value}}{% endfor %}".into(),
+            None,
+        )
+        .unwrap();
+    }
+
+    #[test]
+    fn template_fields_invalid() {
+        assert!(matches!(
+            // intentionally garbled field
+            Prompt::new("{% for field in fields %}{{field.vaelu}} {% endfor %}".into(), None),
+            Err(NewPromptError {
+                kind: NewPromptErrorKind::InvalidFieldsInTemplate(_),
+                fault: FaultSource::User
+            })
+        ));
+    }
+
+    // todo: test truncation
+    #[test]
+    fn template_truncation() {
+        let mut s = "インテル ザー ビーグル".to_string();
+
+        truncate(&mut s, 42);
+        assert_eq!(s, "インテル ザー ビーグル");
+
+        assert_eq!(s.len(), 32);
+        truncate(&mut s, 32);
+        assert_eq!(s, "インテル ザー ビーグル");
+
+        truncate(&mut s, 31);
+        assert_eq!(s, "インテル ザー ビーグ");
+        truncate(&mut s, 30);
+        assert_eq!(s, "インテル ザー ビーグ");
+        truncate(&mut s, 28);
+        assert_eq!(s, "インテル ザー ビー");
+        truncate(&mut s, 26);
+        assert_eq!(s, "インテル ザー ビー");
+        truncate(&mut s, 25);
+        assert_eq!(s, "インテル ザー ビ");
+
+        assert_eq!("イ".len(), 3);
+        truncate(&mut s, 3);
+        assert_eq!(s, "イ");
+        truncate(&mut s, 2);
+        assert_eq!(s, "");
+    }
+}
--- a/crates/milli/src/prompt/template_checker.rs
+++ b/crates/milli/src/prompt/template_checker.rs
@ -0,0 +1,301 @@
+use liquid::model::{
+    ArrayView, DisplayCow, KStringCow, ObjectRender, ObjectSource, State, Value as LiquidValue,
+};
+use liquid::{Object, ObjectView, ValueView};
+
+#[derive(Debug)]
+pub struct TemplateChecker;
+
+#[derive(Debug)]
+pub struct DummyDoc;
+
+#[derive(Debug)]
+pub struct DummyFields;
+
+#[derive(Debug)]
+pub struct DummyField;
+
+const DUMMY_VALUE: &LiquidValue = &LiquidValue::Nil;
+
+impl ObjectView for DummyField {
+    fn as_value(&self) -> &dyn ValueView {
+        self
+    }
+
+    fn size(&self) -> i64 {
+        2
+    }
+
+    fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
+        Box::new(["name", "value"].iter().map(|s| KStringCow::from_static(s)))
+    }
+
+    fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
+        Box::new(vec![DUMMY_VALUE.as_view(), DUMMY_VALUE.as_view()].into_iter())
+    }
+
+    fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
+        Box::new(self.keys().zip(self.values()))
+    }
+
+    fn contains_key(&self, index: &str) -> bool {
+        index == "name" || index == "value"
+    }
+
+    fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
+        if self.contains_key(index) {
+            Some(DUMMY_VALUE.as_view())
+        } else {
+            None
+        }
+    }
+}
+
+impl ValueView for DummyField {
+    fn as_debug(&self) -> &dyn std::fmt::Debug {
+        self
+    }
+
+    fn render(&self) -> DisplayCow<'_> {
+        DUMMY_VALUE.render()
+    }
+
+    fn source(&self) -> DisplayCow<'_> {
+        DUMMY_VALUE.source()
+    }
+
+    fn type_name(&self) -> &'static str {
+        "object"
+    }
+
+    fn query_state(&self, state: State) -> bool {
+        match state {
+            State::Truthy => true,
+            State::DefaultValue => false,
+            State::Empty => false,
+            State::Blank => false,
+        }
+    }
+
+    fn to_kstr(&self) -> KStringCow<'_> {
+        DUMMY_VALUE.to_kstr()
+    }
+
+    fn to_value(&self) -> LiquidValue {
+        let mut this = Object::new();
+        this.insert("name".into(), LiquidValue::Nil);
+        this.insert("value".into(), LiquidValue::Nil);
+        LiquidValue::Object(this)
+    }
+
+    fn as_object(&self) -> Option<&dyn ObjectView> {
+        Some(self)
+    }
+}
+
+impl ValueView for DummyFields {
+    fn as_debug(&self) -> &dyn std::fmt::Debug {
+        self
+    }
+
+    fn render(&self) -> DisplayCow<'_> {
+        DUMMY_VALUE.render()
+    }
+
+    fn source(&self) -> DisplayCow<'_> {
+        DUMMY_VALUE.source()
+    }
+
+    fn type_name(&self) -> &'static str {
+        "array"
+    }
+
+    fn query_state(&self, state: State) -> bool {
+        match state {
+            State::Truthy => true,
+            State::DefaultValue => false,
+            State::Empty => false,
+            State::Blank => false,
+        }
+    }
+
+    fn to_kstr(&self) -> KStringCow<'_> {
+        DUMMY_VALUE.to_kstr()
+    }
+
+    fn to_value(&self) -> LiquidValue {
+        LiquidValue::Array(vec![DummyField.to_value()])
+    }
+
+    fn as_array(&self) -> Option<&dyn ArrayView> {
+        Some(self)
+    }
+}
+
+impl ArrayView for DummyFields {
+    fn as_value(&self) -> &dyn ValueView {
+        self
+    }
+
+    fn size(&self) -> i64 {
+        u16::MAX as i64
+    }
+
+    fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
+        Box::new(std::iter::once(DummyField.as_value()))
+    }
+
+    fn contains_key(&self, index: i64) -> bool {
+        index < self.size()
+    }
+
+    fn get(&self, _index: i64) -> Option<&dyn ValueView> {
+        Some(DummyField.as_value())
+    }
+}
+
+impl ObjectView for DummyDoc {
+    fn as_value(&self) -> &dyn ValueView {
+        self
+    }
+
+    fn size(&self) -> i64 {
+        1000
+    }
+
+    fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
+        Box::new(std::iter::empty())
+    }
+
+    fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
+        Box::new(std::iter::empty())
+    }
+
+    fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
+        Box::new(std::iter::empty())
+    }
+
+    fn contains_key(&self, _index: &str) -> bool {
+        true
+    }
+
+    fn get<'s>(&'s self, _index: &str) -> Option<&'s dyn ValueView> {
+        // Recursively sends itself
+        Some(self)
+    }
+}
+
+impl ValueView for DummyDoc {
+    fn as_debug(&self) -> &dyn std::fmt::Debug {
+        self
+    }
+
+    fn render(&self) -> DisplayCow<'_> {
+        DUMMY_VALUE.render()
+    }
+
+    fn source(&self) -> DisplayCow<'_> {
+        DUMMY_VALUE.source()
+    }
+
+    fn type_name(&self) -> &'static str {
+        "object"
+    }
+
+    fn query_state(&self, state: State) -> bool {
+        match state {
+            State::Truthy => true,
+            State::DefaultValue => false,
+            State::Empty => false,
+            State::Blank => false,
+        }
+    }
+
+    fn to_kstr(&self) -> KStringCow<'_> {
+        DUMMY_VALUE.to_kstr()
+    }
+
+    fn to_value(&self) -> LiquidValue {
+        LiquidValue::Nil
+    }
+
+    fn as_object(&self) -> Option<&dyn ObjectView> {
+        Some(self)
+    }
+}
+
+impl ObjectView for TemplateChecker {
+    fn as_value(&self) -> &dyn ValueView {
+        self
+    }
+
+    fn size(&self) -> i64 {
+        2
+    }
+
+    fn keys<'k>(&'k self) -> Box<dyn Iterator<Item = KStringCow<'k>> + 'k> {
+        Box::new(["doc", "fields"].iter().map(|s| KStringCow::from_static(s)))
+    }
+
+    fn values<'k>(&'k self) -> Box<dyn Iterator<Item = &'k dyn ValueView> + 'k> {
+        Box::new(
+            std::iter::once(DummyDoc.as_value()).chain(std::iter::once(DummyFields.as_value())),
+        )
+    }
+
+    fn iter<'k>(&'k self) -> Box<dyn Iterator<Item = (KStringCow<'k>, &'k dyn ValueView)> + 'k> {
+        Box::new(self.keys().zip(self.values()))
+    }
+
+    fn contains_key(&self, index: &str) -> bool {
+        index == "doc" || index == "fields"
+    }
+
+    fn get<'s>(&'s self, index: &str) -> Option<&'s dyn ValueView> {
+        match index {
+            "doc" => Some(DummyDoc.as_value()),
+            "fields" => Some(DummyFields.as_value()),
+            _ => None,
+        }
+    }
+}
+
+impl ValueView for TemplateChecker {
+    fn as_debug(&self) -> &dyn std::fmt::Debug {
+        self
+    }
+
+    fn render(&self) -> liquid::model::DisplayCow<'_> {
+        DisplayCow::Owned(Box::new(ObjectRender::new(self)))
+    }
+
+    fn source(&self) -> liquid::model::DisplayCow<'_> {
+        DisplayCow::Owned(Box::new(ObjectSource::new(self)))
+    }
+
+    fn type_name(&self) -> &'static str {
+        "object"
+    }
+
+    fn query_state(&self, state: liquid::model::State) -> bool {
+        match state {
+            State::Truthy => true,
+            State::DefaultValue | State::Empty | State::Blank => false,
+        }
+    }
+
+    fn to_kstr(&self) -> liquid::model::KStringCow<'_> {
+        let s = ObjectRender::new(self).to_string();
+        KStringCow::from_string(s)
+    }
+
+    fn to_value(&self) -> LiquidValue {
+        LiquidValue::Object(
+            self.iter().map(|(k, x)| (k.to_string().into(), x.to_value())).collect(),
+        )
+    }
+
+    fn as_object(&self) -> Option<&dyn ObjectView> {
+        Some(self)
+    }
+}
--- a/crates/milli/src/proximity.rs
+++ b/crates/milli/src/proximity.rs
@ -0,0 +1,37 @@
+use std::cmp;
+
+use serde::{Deserialize, Serialize};
+
+use crate::{relative_from_absolute_position, Position};
+
+pub const MAX_DISTANCE: u32 = 4;
+
+pub fn index_proximity(lhs: u32, rhs: u32) -> u32 {
+    if lhs <= rhs {
+        cmp::min(rhs - lhs, MAX_DISTANCE)
+    } else {
+        cmp::min((lhs - rhs) + 1, MAX_DISTANCE)
+    }
+}
+
+pub fn positions_proximity(lhs: Position, rhs: Position) -> u32 {
+    let (lhs_attr, lhs_index) = relative_from_absolute_position(lhs);
+    let (rhs_attr, rhs_index) = relative_from_absolute_position(rhs);
+    if lhs_attr != rhs_attr {
+        MAX_DISTANCE
+    } else {
+        index_proximity(lhs_index as u32, rhs_index as u32)
+    }
+}
+
+pub fn path_proximity(path: &[Position]) -> u32 {
+    path.windows(2).map(|w| positions_proximity(w[0], w[1])).sum::<u32>()
+}
+
+#[derive(Debug, Clone, Copy, Serialize, Deserialize, PartialEq, Eq, Default)]
+#[serde(rename_all = "camelCase")]
+pub enum ProximityPrecision {
+    #[default]
+    ByWord,
+    ByAttribute,
+}
--- a/crates/milli/src/score_details.rs
+++ b/crates/milli/src/score_details.rs
@ -0,0 +1,495 @@
+use std::cmp::Ordering;
+
+use itertools::Itertools;
+use serde::Serialize;
+
+use crate::distance_between_two_points;
+
+#[derive(Debug, Clone, PartialEq)]
+pub enum ScoreDetails {
+    Words(Words),
+    Typo(Typo),
+    Proximity(Rank),
+    Fid(Rank),
+    Position(Rank),
+    ExactAttribute(ExactAttribute),
+    ExactWords(ExactWords),
+    Sort(Sort),
+    Vector(Vector),
+    GeoSort(GeoSort),
+
+    /// Returned when we don't have the time to finish applying all the subsequent ranking-rules
+    Skipped,
+}
+
+#[derive(Clone, Copy)]
+pub enum ScoreValue<'a> {
+    Score(f64),
+    Sort(&'a Sort),
+    GeoSort(&'a GeoSort),
+}
+
+enum RankOrValue<'a> {
+    Rank(Rank),
+    Sort(&'a Sort),
+    GeoSort(&'a GeoSort),
+    Score(f64),
+}
+
+impl ScoreDetails {
+    pub fn local_score(&self) -> Option<f64> {
+        self.rank().map(Rank::local_score)
+    }
+
+    pub fn rank(&self) -> Option<Rank> {
+        match self {
+            ScoreDetails::Words(details) => Some(details.rank()),
+            ScoreDetails::Typo(details) => Some(details.rank()),
+            ScoreDetails::Proximity(details) => Some(*details),
+            ScoreDetails::Fid(details) => Some(*details),
+            ScoreDetails::Position(details) => Some(*details),
+            ScoreDetails::ExactAttribute(details) => Some(details.rank()),
+            ScoreDetails::ExactWords(details) => Some(details.rank()),
+            ScoreDetails::Sort(_) => None,
+            ScoreDetails::GeoSort(_) => None,
+            ScoreDetails::Vector(_) => None,
+            ScoreDetails::Skipped => Some(Rank { rank: 0, max_rank: 1 }),
+        }
+    }
+
+    pub fn global_score<'a>(details: impl Iterator<Item = &'a Self> + 'a) -> f64 {
+        Self::score_values(details)
+            .find_map(|x| {
+                let ScoreValue::Score(score) = x else {
+                    return None;
+                };
+                Some(score)
+            })
+            .unwrap_or(1.0f64)
+    }
+
+    pub fn score_values<'a>(
+        details: impl Iterator<Item = &'a Self> + 'a,
+    ) -> impl Iterator<Item = ScoreValue<'a>> + 'a {
+        details
+            .map(ScoreDetails::rank_or_value)
+            .coalesce(|left, right| match (left, right) {
+                (RankOrValue::Rank(left), RankOrValue::Rank(right)) => {
+                    Ok(RankOrValue::Rank(Rank::merge(left, right)))
+                }
+                (left, right) => Err((left, right)),
+            })
+            .map(|rank_or_value| match rank_or_value {
+                RankOrValue::Rank(r) => ScoreValue::Score(r.local_score()),
+                RankOrValue::Sort(s) => ScoreValue::Sort(s),
+                RankOrValue::GeoSort(g) => ScoreValue::GeoSort(g),
+                RankOrValue::Score(s) => ScoreValue::Score(s),
+            })
+    }
+
+    fn rank_or_value(&self) -> RankOrValue<'_> {
+        match self {
+            ScoreDetails::Words(w) => RankOrValue::Rank(w.rank()),
+            ScoreDetails::Typo(t) => RankOrValue::Rank(t.rank()),
+            ScoreDetails::Proximity(p) => RankOrValue::Rank(*p),
+            ScoreDetails::Fid(f) => RankOrValue::Rank(*f),
+            ScoreDetails::Position(p) => RankOrValue::Rank(*p),
+            ScoreDetails::ExactAttribute(e) => RankOrValue::Rank(e.rank()),
+            ScoreDetails::ExactWords(e) => RankOrValue::Rank(e.rank()),
+            ScoreDetails::Sort(sort) => RankOrValue::Sort(sort),
+            ScoreDetails::GeoSort(geosort) => RankOrValue::GeoSort(geosort),
+            ScoreDetails::Vector(vector) => {
+                RankOrValue::Score(vector.similarity.as_ref().map(|s| *s as f64).unwrap_or(0.0f64))
+            }
+            ScoreDetails::Skipped => RankOrValue::Rank(Rank { rank: 0, max_rank: 1 }),
+        }
+    }
+
+    /// Panics
+    ///
+    /// - If Position is not preceded by Fid
+    /// - If Exactness is not preceded by ExactAttribute
+    pub fn to_json_map<'a>(
+        details: impl Iterator<Item = &'a Self>,
+    ) -> serde_json::Map<String, serde_json::Value> {
+        let mut order = 0;
+        let mut fid_details = None;
+        let mut details_map = serde_json::Map::default();
+        for details in details {
+            match details {
+                ScoreDetails::Words(words) => {
+                    let words_details = serde_json::json!({
+                            "order": order,
+                            "matchingWords": words.matching_words,
+                            "maxMatchingWords": words.max_matching_words,
+                            "score": words.rank().local_score(),
+                    });
+                    details_map.insert("words".into(), words_details);
+                    order += 1;
+                }
+                ScoreDetails::Typo(typo) => {
+                    let typo_details = serde_json::json!({
+                        "order": order,
+                        "typoCount": typo.typo_count,
+                        "maxTypoCount": typo.max_typo_count,
+                        "score": typo.rank().local_score(),
+                    });
+                    details_map.insert("typo".into(), typo_details);
+                    order += 1;
+                }
+                ScoreDetails::Proximity(proximity) => {
+                    let proximity_details = serde_json::json!({
+                        "order": order,
+                        "score": proximity.local_score(),
+                    });
+                    details_map.insert("proximity".into(), proximity_details);
+                    order += 1;
+                }
+                ScoreDetails::Fid(fid) => {
+                    // copy the rank for future use in Position.
+                    fid_details = Some(*fid);
+                    // For now, fid is a virtual rule always followed by the "position" rule
+                    let fid_details = serde_json::json!({
+                        "order": order,
+                        "attributeRankingOrderScore": fid.local_score(),
+                    });
+                    details_map.insert("attribute".into(), fid_details);
+                    order += 1;
+                }
+                ScoreDetails::Position(position) => {
+                    // For now, position is a virtual rule always preceded by the "fid" rule
+                    let attribute_details = details_map
+                        .get_mut("attribute")
+                        .expect("position not preceded by attribute");
+                    let attribute_details = attribute_details
+                        .as_object_mut()
+                        .expect("attribute details was not an object");
+                    let Some(fid_details) = fid_details else {
+                        unimplemented!("position not preceded by attribute");
+                    };
+
+                    attribute_details
+                        .insert("queryWordDistanceScore".into(), position.local_score().into());
+                    let score = Rank::global_score([fid_details, *position].iter().copied());
+                    attribute_details.insert("score".into(), score.into());
+
+                    // do not update the order since this was already done by fid
+                }
+                ScoreDetails::ExactAttribute(exact_attribute) => {
+                    let exactness_details = serde_json::json!({
+                        "order": order,
+                        "matchType": exact_attribute,
+                        "score": exact_attribute.rank().local_score(),
+                    });
+                    details_map.insert("exactness".into(), exactness_details);
+                    order += 1;
+                }
+                ScoreDetails::ExactWords(details) => {
+                    // For now, exactness is a virtual rule always preceded by the "ExactAttribute" rule
+                    let exactness_details = details_map
+                        .get_mut("exactness")
+                        .expect("Exactness not preceded by exactAttribute");
+                    let exactness_details = exactness_details
+                        .as_object_mut()
+                        .expect("exactness details was not an object");
+                    if exactness_details.get("matchType").expect("missing 'matchType'")
+                        == &serde_json::json!(ExactAttribute::NoExactMatch)
+                    {
+                        let score = Rank::global_score(
+                            [ExactAttribute::NoExactMatch.rank(), details.rank()].iter().copied(),
+                        );
+                        // tiny detail, but we want the score to be the last displayed field,
+                        // so we're removing it here, adding the other fields, then adding the new score
+                        exactness_details.remove("score");
+                        exactness_details
+                            .insert("matchingWords".into(), details.matching_words.into());
+                        exactness_details
+                            .insert("maxMatchingWords".into(), details.max_matching_words.into());
+                        exactness_details.insert("score".into(), score.into());
+                    }
+                    // do not update the order since this was already done by exactAttribute
+                }
+                ScoreDetails::Sort(details) => {
+                    let sort = if details.redacted {
+                        format!("<hidden-rule-{order}>")
+                    } else {
+                        format!(
+                            "{}:{}",
+                            details.field_name,
+                            if details.ascending { "asc" } else { "desc" }
+                        )
+                    };
+                    let value =
+                        if details.redacted { "<hidden>".into() } else { details.value.clone() };
+                    let sort_details = serde_json::json!({
+                        "order": order,
+                        "value": value,
+                    });
+                    details_map.insert(sort, sort_details);
+                    order += 1;
+                }
+                ScoreDetails::GeoSort(details) => {
+                    let sort = format!(
+                        "_geoPoint({}, {}):{}",
+                        details.target_point[0],
+                        details.target_point[1],
+                        if details.ascending { "asc" } else { "desc" }
+                    );
+                    let point = if let Some(value) = details.value {
+                        serde_json::json!({ "lat": value[0], "lng": value[1]})
+                    } else {
+                        serde_json::Value::Null
+                    };
+                    let sort_details = serde_json::json!({
+                        "order": order,
+                        "value": point,
+                        "distance": details.distance(),
+                    });
+                    details_map.insert(sort, sort_details);
+                    order += 1;
+                }
+                ScoreDetails::Vector(s) => {
+                    let similarity = s.similarity.as_ref();
+
+                    let details = serde_json::json!({
+                        "order": order,
+                        "similarity": similarity,
+                    });
+                    details_map.insert("vectorSort".into(), details);
+                    order += 1;
+                }
+                ScoreDetails::Skipped => {
+                    details_map
+                        .insert("skipped".to_string(), serde_json::json!({ "order": order }));
+                    order += 1;
+                }
+            }
+        }
+        details_map
+    }
+}
+
+/// The strategy to compute scores.
+///
+/// It makes sense to pass down this strategy to the internals of the search, because
+/// some optimizations (today, mainly skipping ranking rules for universes of a single document)
+/// are not correct to do when computing the scores.
+///
+/// This strategy could feasibly be extended to differentiate between the normalized score and the
+/// detailed scores, but it is not useful today as the normalized score is *derived from* the
+/// detailed scores.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Default)]
+pub enum ScoringStrategy {
+    /// Don't compute scores
+    #[default]
+    Skip,
+    /// Compute detailed scores
+    Detailed,
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Words {
+    pub matching_words: u32,
+    pub max_matching_words: u32,
+}
+
+impl Words {
+    pub fn rank(&self) -> Rank {
+        Rank { rank: self.matching_words, max_rank: self.max_matching_words }
+    }
+
+    pub(crate) fn from_rank(rank: Rank) -> Self {
+        Self { matching_words: rank.rank, max_matching_words: rank.max_rank }
+    }
+}
+
+/// Structure that is super similar to [`Words`], but whose semantics is a bit distinct.
+///
+/// In exactness, the number of matching words can actually be 0 with a non-zero score,
+/// if no words from the query appear exactly in the document.
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct ExactWords {
+    pub matching_words: u32,
+    pub max_matching_words: u32,
+}
+
+impl ExactWords {
+    pub fn rank(&self) -> Rank {
+        // 0 matching words means last rank (1)
+        Rank { rank: self.matching_words + 1, max_rank: self.max_matching_words + 1 }
+    }
+
+    pub(crate) fn from_rank(rank: Rank) -> Self {
+        // last rank (1) means that 0 words from the query appear exactly in the document.
+        // first rank (max_rank) means that (max_rank - 1) words from the query appear exactly in the document.
+        Self {
+            matching_words: rank.rank.saturating_sub(1),
+            max_matching_words: rank.max_rank.saturating_sub(1),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Typo {
+    pub typo_count: u32,
+    pub max_typo_count: u32,
+}
+
+impl Typo {
+    pub fn rank(&self) -> Rank {
+        Rank {
+            rank: (self.max_typo_count + 1).saturating_sub(self.typo_count),
+            max_rank: (self.max_typo_count + 1),
+        }
+    }
+
+    // max_rank = max_typo + 1
+    // max_typo = max_rank - 1
+    //
+    // rank = max_typo - typo + 1
+    // rank = max_rank - 1 - typo + 1
+    // rank + typo = max_rank
+    // typo = max_rank - rank
+    pub fn from_rank(rank: Rank) -> Typo {
+        Typo {
+            typo_count: rank.max_rank.saturating_sub(rank.rank),
+            max_typo_count: rank.max_rank.saturating_sub(1),
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
+pub struct Rank {
+    /// The ordinal rank, such that `max_rank` is the first rank, and 0 is the last rank.
+    ///
+    /// The higher the better. Documents with a rank of 0 have a score of 0 and are typically never returned
+    /// (they don't match the query).
+    pub rank: u32,
+    /// The maximum possible rank. Documents with this rank have a score of 1.
+    ///
+    /// The max rank should not be 0.
+    pub max_rank: u32,
+}
+
+impl Rank {
+    pub fn local_score(self) -> f64 {
+        self.rank as f64 / self.max_rank as f64
+    }
+
+    pub fn global_score(details: impl Iterator<Item = Self>) -> f64 {
+        let mut rank = Rank { rank: 1, max_rank: 1 };
+        for inner_rank in details {
+            rank = Rank::merge(rank, inner_rank);
+        }
+        rank.local_score()
+    }
+
+    pub fn merge(mut outer: Rank, inner: Rank) -> Rank {
+        outer.rank = outer.rank.saturating_sub(1);
+
+        outer.rank *= inner.max_rank;
+        outer.max_rank *= inner.max_rank;
+
+        outer.rank += inner.rank;
+
+        outer
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Serialize)]
+#[serde(rename_all = "camelCase")]
+pub enum ExactAttribute {
+    ExactMatch,
+    MatchesStart,
+    NoExactMatch,
+}
+
+impl ExactAttribute {
+    pub fn rank(&self) -> Rank {
+        let rank = match self {
+            ExactAttribute::ExactMatch => 3,
+            ExactAttribute::MatchesStart => 2,
+            ExactAttribute::NoExactMatch => 1,
+        };
+        Rank { rank, max_rank: 3 }
+    }
+}
+
+#[derive(Debug, Clone, PartialEq)]
+pub struct Sort {
+    pub field_name: String,
+    pub ascending: bool,
+    pub redacted: bool,
+    pub value: serde_json::Value,
+}
+
+impl PartialOrd for Sort {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        if self.ascending != other.ascending {
+            return None;
+        }
+        match (&self.value, &other.value) {
+            (serde_json::Value::Null, serde_json::Value::Null) => Some(Ordering::Equal),
+            (serde_json::Value::Null, _) => Some(Ordering::Less),
+            (_, serde_json::Value::Null) => Some(Ordering::Greater),
+            // numbers are always before strings
+            (serde_json::Value::Number(_), serde_json::Value::String(_)) => Some(Ordering::Greater),
+            (serde_json::Value::String(_), serde_json::Value::Number(_)) => Some(Ordering::Less),
+            (serde_json::Value::Number(left), serde_json::Value::Number(right)) => {
+                // FIXME: unwrap permitted here?
+                let order = left.as_f64().unwrap().partial_cmp(&right.as_f64().unwrap())?;
+                // 12 < 42, and when ascending, we want to see 12 first, so the smallest.
+                // Hence, when ascending, smaller is better
+                Some(if self.ascending { order.reverse() } else { order })
+            }
+            (serde_json::Value::String(left), serde_json::Value::String(right)) => {
+                let order = left.cmp(right);
+                // Taking e.g. "a" and "z"
+                // "a" < "z", and when ascending, we want to see "a" first, so the smallest.
+                // Hence, when ascending, smaller is better
+                Some(if self.ascending { order.reverse() } else { order })
+            }
+            _ => None,
+        }
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq)]
+pub struct GeoSort {
+    pub target_point: [f64; 2],
+    pub ascending: bool,
+    pub value: Option<[f64; 2]>,
+}
+
+impl PartialOrd for GeoSort {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        if self.ascending != other.ascending {
+            return None;
+        }
+        Some(match (self.distance(), other.distance()) {
+            (None, None) => Ordering::Equal,
+            (None, Some(_)) => Ordering::Less,
+            (Some(_), None) => Ordering::Greater,
+            (Some(left), Some(right)) => {
+                let order = left.partial_cmp(&right)?;
+                if self.ascending {
+                    // when ascending, the one with the smallest distance has the best score
+                    order.reverse()
+                } else {
+                    order
+                }
+            }
+        })
+    }
+}
+
+#[derive(Debug, Clone, PartialEq, PartialOrd)]
+pub struct Vector {
+    pub similarity: Option<f32>,
+}
+
+impl GeoSort {
+    pub fn distance(&self) -> Option<f64> {
+        self.value.map(|value| distance_between_two_points(&self.target_point, &value))
+    }
+}
--- a/crates/milli/src/search/facet/facet_distribution.rs
+++ b/crates/milli/src/search/facet/facet_distribution.rs
@ -0,0 +1,846 @@
+use std::collections::{BTreeMap, HashMap, HashSet};
+use std::fmt::Display;
+use std::ops::ControlFlow;
+use std::{fmt, mem};
+
+use heed::types::Bytes;
+use heed::BytesDecode;
+use indexmap::IndexMap;
+use roaring::RoaringBitmap;
+use serde::{Deserialize, Serialize};
+
+use crate::error::UserError;
+use crate::facet::FacetType;
+use crate::heed_codec::facet::{
+    FacetGroupKeyCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, OrderedF64Codec,
+};
+use crate::heed_codec::{BytesRefCodec, StrRefCodec};
+use crate::search::facet::facet_distribution_iter::{
+    count_iterate_over_facet_distribution, lexicographically_iterate_over_facet_distribution,
+};
+use crate::{FieldId, Index, Result};
+
+/// The default number of values by facets that will
+/// be fetched from the key-value store.
+pub const DEFAULT_VALUES_PER_FACET: usize = 100;
+
+/// Threshold on the number of candidates that will make
+/// the system to choose between one algorithm or another.
+const CANDIDATES_THRESHOLD: u64 = 3000;
+
+/// How should we fetch the facets?
+#[derive(Debug, Default, Clone, Copy, PartialEq, Eq, Serialize, Deserialize)]
+pub enum OrderBy {
+    /// By lexicographic order...
+    #[default]
+    Lexicographic,
+    /// Or by number of docids in common?
+    Count,
+}
+
+impl Display for OrderBy {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        match self {
+            OrderBy::Lexicographic => f.write_str("alphabetically"),
+            OrderBy::Count => f.write_str("by count"),
+        }
+    }
+}
+
+pub struct FacetDistribution<'a> {
+    facets: Option<HashMap<String, OrderBy>>,
+    candidates: Option<RoaringBitmap>,
+    max_values_per_facet: usize,
+    default_order_by: OrderBy,
+    rtxn: &'a heed::RoTxn<'a>,
+    index: &'a Index,
+}
+
+impl<'a> FacetDistribution<'a> {
+    pub fn new(rtxn: &'a heed::RoTxn<'a>, index: &'a Index) -> FacetDistribution<'a> {
+        FacetDistribution {
+            facets: None,
+            candidates: None,
+            max_values_per_facet: DEFAULT_VALUES_PER_FACET,
+            default_order_by: OrderBy::default(),
+            rtxn,
+            index,
+        }
+    }
+
+    pub fn facets<I: IntoIterator<Item = (A, OrderBy)>, A: AsRef<str>>(
+        &mut self,
+        names_ordered_by: I,
+    ) -> &mut Self {
+        self.facets = Some(
+            names_ordered_by
+                .into_iter()
+                .map(|(name, order_by)| (name.as_ref().to_string(), order_by))
+                .collect(),
+        );
+        self
+    }
+
+    pub fn max_values_per_facet(&mut self, max: usize) -> &mut Self {
+        self.max_values_per_facet = max;
+        self
+    }
+
+    pub fn default_order_by(&mut self, order_by: OrderBy) -> &mut Self {
+        self.default_order_by = order_by;
+        self
+    }
+
+    pub fn candidates(&mut self, candidates: RoaringBitmap) -> &mut Self {
+        self.candidates = Some(candidates);
+        self
+    }
+
+    /// There is a small amount of candidates OR we ask for facet string values so we
+    /// decide to iterate over the facet values of each one of them, one by one.
+    fn facet_distribution_from_documents(
+        &self,
+        field_id: FieldId,
+        facet_type: FacetType,
+        candidates: &RoaringBitmap,
+        distribution: &mut IndexMap<String, u64>,
+    ) -> heed::Result<()> {
+        match facet_type {
+            FacetType::Number => {
+                let mut lexicographic_distribution = BTreeMap::new();
+                let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
+
+                let db = self.index.field_id_docid_facet_f64s;
+                for docid in candidates {
+                    key_buffer.truncate(mem::size_of::<FieldId>());
+                    key_buffer.extend_from_slice(&docid.to_be_bytes());
+                    let iter = db
+                        .remap_key_type::<Bytes>()
+                        .prefix_iter(self.rtxn, &key_buffer)?
+                        .remap_key_type::<FieldDocIdFacetF64Codec>();
+
+                    for result in iter {
+                        let ((_, _, value), ()) = result?;
+                        *lexicographic_distribution.entry(value.to_string()).or_insert(0) += 1;
+                    }
+                }
+
+                distribution.extend(
+                    lexicographic_distribution
+                        .into_iter()
+                        .take(self.max_values_per_facet.saturating_sub(distribution.len())),
+                );
+            }
+            FacetType::String => {
+                let mut normalized_distribution = BTreeMap::new();
+                let mut key_buffer: Vec<_> = field_id.to_be_bytes().to_vec();
+
+                let db = self.index.field_id_docid_facet_strings;
+                for docid in candidates {
+                    key_buffer.truncate(mem::size_of::<FieldId>());
+                    key_buffer.extend_from_slice(&docid.to_be_bytes());
+                    let iter = db
+                        .remap_key_type::<Bytes>()
+                        .prefix_iter(self.rtxn, &key_buffer)?
+                        .remap_key_type::<FieldDocIdFacetStringCodec>();
+
+                    for result in iter {
+                        let ((_, _, normalized_value), original_value) = result?;
+                        let (_, count) = normalized_distribution
+                            .entry(normalized_value)
+                            .or_insert_with(|| (original_value, 0));
+                        *count += 1;
+
+                        // we'd like to break here if we have enough facet values, but we are collecting them by increasing docid,
+                        // so higher ranked facets could be in later docids
+                    }
+                }
+
+                let iter = normalized_distribution
+                    .into_iter()
+                    .take(self.max_values_per_facet.saturating_sub(distribution.len()))
+                    .map(|(_normalized, (original, count))| (original.to_string(), count));
+                distribution.extend(iter);
+            }
+        }
+
+        Ok(())
+    }
+
+    /// There is too much documents, we use the facet levels to move throught
+    /// the facet values, to find the candidates and values associated.
+    fn facet_numbers_distribution_from_facet_levels(
+        &self,
+        field_id: FieldId,
+        candidates: &RoaringBitmap,
+        order_by: OrderBy,
+        distribution: &mut IndexMap<String, u64>,
+    ) -> heed::Result<()> {
+        let search_function = match order_by {
+            OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution,
+            OrderBy::Count => count_iterate_over_facet_distribution,
+        };
+
+        search_function(
+            self.rtxn,
+            self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
+            field_id,
+            candidates,
+            |facet_key, nbr_docids, _| {
+                let facet_key = OrderedF64Codec::bytes_decode(facet_key).unwrap();
+                distribution.insert(facet_key.to_string(), nbr_docids);
+                if distribution.len() == self.max_values_per_facet {
+                    Ok(ControlFlow::Break(()))
+                } else {
+                    Ok(ControlFlow::Continue(()))
+                }
+            },
+        )
+    }
+
+    fn facet_strings_distribution_from_facet_levels(
+        &self,
+        field_id: FieldId,
+        candidates: &RoaringBitmap,
+        order_by: OrderBy,
+        distribution: &mut IndexMap<String, u64>,
+    ) -> heed::Result<()> {
+        let search_function = match order_by {
+            OrderBy::Lexicographic => lexicographically_iterate_over_facet_distribution,
+            OrderBy::Count => count_iterate_over_facet_distribution,
+        };
+
+        search_function(
+            self.rtxn,
+            self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>(),
+            field_id,
+            candidates,
+            |facet_key, nbr_docids, any_docid| {
+                let facet_key = StrRefCodec::bytes_decode(facet_key).unwrap();
+
+                let key: (FieldId, _, &str) = (field_id, any_docid, facet_key);
+                let original_string = self
+                    .index
+                    .field_id_docid_facet_strings
+                    .get(self.rtxn, &key)?
+                    .unwrap()
+                    .to_owned();
+
+                distribution.insert(original_string, nbr_docids);
+                if distribution.len() == self.max_values_per_facet {
+                    Ok(ControlFlow::Break(()))
+                } else {
+                    Ok(ControlFlow::Continue(()))
+                }
+            },
+        )
+    }
+
+    fn facet_values(
+        &self,
+        field_id: FieldId,
+        order_by: OrderBy,
+    ) -> heed::Result<IndexMap<String, u64>> {
+        use FacetType::{Number, String};
+
+        let mut distribution = IndexMap::new();
+        match (order_by, &self.candidates) {
+            (OrderBy::Lexicographic, Some(cnd)) if cnd.len() <= CANDIDATES_THRESHOLD => {
+                // Classic search, candidates were specified, we must return facet values only related
+                // to those candidates. We also enter here for facet strings for performance reasons.
+                self.facet_distribution_from_documents(field_id, Number, cnd, &mut distribution)?;
+                self.facet_distribution_from_documents(field_id, String, cnd, &mut distribution)?;
+            }
+            _ => {
+                let universe;
+                let candidates = match &self.candidates {
+                    Some(cnd) => cnd,
+                    None => {
+                        universe = self.index.documents_ids(self.rtxn)?;
+                        &universe
+                    }
+                };
+
+                self.facet_numbers_distribution_from_facet_levels(
+                    field_id,
+                    candidates,
+                    order_by,
+                    &mut distribution,
+                )?;
+                self.facet_strings_distribution_from_facet_levels(
+                    field_id,
+                    candidates,
+                    order_by,
+                    &mut distribution,
+                )?;
+            }
+        };
+
+        Ok(distribution)
+    }
+
+    pub fn compute_stats(&self) -> Result<BTreeMap<String, (f64, f64)>> {
+        let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
+        let filterable_fields = self.index.filterable_fields(self.rtxn)?;
+        let candidates = if let Some(candidates) = self.candidates.clone() {
+            candidates
+        } else {
+            return Ok(Default::default());
+        };
+
+        let fields = match &self.facets {
+            Some(facets) => {
+                let invalid_fields: HashSet<_> = facets
+                    .iter()
+                    .map(|(name, _)| name)
+                    .filter(|facet| !crate::is_faceted(facet, &filterable_fields))
+                    .collect();
+                if !invalid_fields.is_empty() {
+                    return Err(UserError::InvalidFacetsDistribution {
+                        invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
+                        valid_facets_name: filterable_fields.into_iter().collect(),
+                    }
+                    .into());
+                } else {
+                    facets.iter().map(|(name, _)| name).cloned().collect()
+                }
+            }
+            None => filterable_fields,
+        };
+
+        let mut distribution = BTreeMap::new();
+        for (fid, name) in fields_ids_map.iter() {
+            if crate::is_faceted(name, &fields) {
+                let min_value = if let Some(min_value) = crate::search::facet::facet_min_value(
+                    self.index,
+                    self.rtxn,
+                    fid,
+                    candidates.clone(),
+                )? {
+                    min_value
+                } else {
+                    continue;
+                };
+                let max_value = if let Some(max_value) = crate::search::facet::facet_max_value(
+                    self.index,
+                    self.rtxn,
+                    fid,
+                    candidates.clone(),
+                )? {
+                    max_value
+                } else {
+                    continue;
+                };
+
+                distribution.insert(name.to_string(), (min_value, max_value));
+            }
+        }
+
+        Ok(distribution)
+    }
+
+    pub fn execute(&self) -> Result<BTreeMap<String, IndexMap<String, u64>>> {
+        let fields_ids_map = self.index.fields_ids_map(self.rtxn)?;
+        let filterable_fields = self.index.filterable_fields(self.rtxn)?;
+
+        let fields = match self.facets {
+            Some(ref facets) => {
+                let invalid_fields: HashSet<_> = facets
+                    .iter()
+                    .map(|(name, _)| name)
+                    .filter(|facet| !crate::is_faceted(facet, &filterable_fields))
+                    .collect();
+                if !invalid_fields.is_empty() {
+                    return Err(UserError::InvalidFacetsDistribution {
+                        invalid_facets_name: invalid_fields.into_iter().cloned().collect(),
+                        valid_facets_name: filterable_fields.into_iter().collect(),
+                    }
+                    .into());
+                } else {
+                    facets.iter().map(|(name, _)| name).cloned().collect()
+                }
+            }
+            None => filterable_fields,
+        };
+
+        let mut distribution = BTreeMap::new();
+        for (fid, name) in fields_ids_map.iter() {
+            if crate::is_faceted(name, &fields) {
+                let order_by = self
+                    .facets
+                    .as_ref()
+                    .and_then(|facets| facets.get(name).copied())
+                    .unwrap_or(self.default_order_by);
+                let values = self.facet_values(fid, order_by)?;
+                distribution.insert(name.to_string(), values);
+            }
+        }
+
+        Ok(distribution)
+    }
+}
+
+impl fmt::Debug for FacetDistribution<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let FacetDistribution {
+            facets,
+            candidates,
+            max_values_per_facet,
+            default_order_by,
+            rtxn: _,
+            index: _,
+        } = self;
+
+        f.debug_struct("FacetDistribution")
+            .field("facets", facets)
+            .field("candidates", candidates)
+            .field("max_values_per_facet", max_values_per_facet)
+            .field("default_order_by", default_order_by)
+            .finish()
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::iter;
+
+    use big_s::S;
+    use maplit::hashset;
+
+    use crate::documents::documents_batch_reader_from_objects;
+    use crate::index::tests::TempIndex;
+    use crate::{milli_snap, FacetDistribution, OrderBy};
+
+    #[test]
+    fn few_candidates_few_facet_values() {
+        // All the tests here avoid using the code in `facet_distribution_iter` because there aren't
+        // enough candidates.
+
+        let mut index = TempIndex::new();
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let documents = documents!([
+            { "colour": "Blue" },
+            { "colour": "  blue" },
+            { "colour": "RED" }
+        ]);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates([0, 1, 2].iter().copied().collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2, "RED": 1}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates([1, 2].iter().copied().collect())
+            .execute()
+            .unwrap();
+
+        // I think it would be fine if "  blue" was "Blue" instead.
+        // We just need to get any non-normalised string I think, even if it's not in
+        // the candidates
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"  blue": 1, "RED": 1}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates([2].iter().copied().collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"RED": 1}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates([0, 1, 2].iter().copied().collect())
+            .max_values_per_facet(1)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::Count)))
+            .candidates([0, 1, 2].iter().copied().collect())
+            .max_values_per_facet(1)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2}}"###);
+    }
+
+    #[test]
+    fn many_candidates_few_facet_values() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = ["Red", "RED", " red ", "Blue", "BLUE"];
+
+        let mut documents = vec![];
+        for i in 0..10_000 {
+            let document = serde_json::json!({
+                "colour": facet_values[i % 5],
+            })
+            .as_object()
+            .unwrap()
+            .clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .max_values_per_facet(1)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..10_000).collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 4000, "Red": 6000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..5_000).collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..5_000).collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000, "Red": 3000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..5_000).collect())
+            .max_values_per_facet(1)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Blue": 2000}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::Count)))
+            .candidates((0..5_000).collect())
+            .max_values_per_facet(1)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), @r###"{"colour": {"Red": 3000}}"###);
+    }
+
+    #[test]
+    fn many_candidates_many_facet_values() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = (0..1000).map(|x| format!("{x:x}")).collect::<Vec<_>>();
+
+        let mut documents = vec![];
+        for i in 0..10_000 {
+            let document = serde_json::json!({
+                "colour": facet_values[i % 1000],
+            })
+            .as_object()
+            .unwrap()
+            .clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates", @"ac9229ed5964d893af96a7076e2f8af5");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .max_values_per_facet(2)
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates_with_max_2", @r###"{"colour": {"0": 10, "1": 10}}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..10_000).collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_10_000", @"ac9229ed5964d893af96a7076e2f8af5");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..5_000).collect())
+            .execute()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_5_000", @"825f23a4090d05756f46176987b7d992");
+    }
+
+    #[test]
+    fn facet_stats() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = (0..1000).collect::<Vec<_>>();
+
+        let mut documents = vec![];
+        for i in 0..1000 {
+            let document = serde_json::json!({
+                "colour": facet_values[i % 1000],
+            })
+            .as_object()
+            .unwrap()
+            .clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..1000).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((217..777).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
+    }
+
+    #[test]
+    fn facet_stats_array() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = (0..1000).collect::<Vec<_>>();
+
+        let mut documents = vec![];
+        for i in 0..1000 {
+            let document = serde_json::json!({
+                "colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
+            })
+            .as_object()
+            .unwrap()
+            .clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..1000).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1999.0)}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((217..777).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 1776.0)}"###);
+    }
+
+    #[test]
+    fn facet_stats_mixed_array() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = (0..1000).collect::<Vec<_>>();
+
+        let mut documents = vec![];
+        for i in 0..1000 {
+            let document = serde_json::json!({
+                "colour": [facet_values[i % 1000], format!("{}", facet_values[i % 1000] + 1000)],
+            })
+            .as_object()
+            .unwrap()
+            .clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..1000).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 999.0)}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((217..777).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (217.0, 776.0)}"###);
+    }
+
+    #[test]
+    fn facet_mixed_values() {
+        let mut index = TempIndex::new_with_map_size(4096 * 10_000);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| settings.set_filterable_fields(hashset! { S("colour") }))
+            .unwrap();
+
+        let facet_values = (0..1000).collect::<Vec<_>>();
+
+        let mut documents = vec![];
+        for i in 0..1000 {
+            let document = if i % 2 == 0 {
+                serde_json::json!({
+                    "colour": [facet_values[i % 1000], facet_values[i % 1000] + 1000],
+                })
+            } else {
+                serde_json::json!({
+                    "colour": format!("{}", facet_values[i % 1000] + 10000),
+                })
+            };
+            let document = document.as_object().unwrap().clone();
+            documents.push(document);
+        }
+
+        let documents = documents_batch_reader_from_objects(documents);
+
+        index.add_documents(documents).unwrap();
+
+        let txn = index.read_txn().unwrap();
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "no_candidates", @"{}");
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((0..1000).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_0_1000", @r###"{"colour": (0.0, 1998.0)}"###);
+
+        let map = FacetDistribution::new(&txn, &index)
+            .facets(iter::once(("colour", OrderBy::default())))
+            .candidates((217..777).collect())
+            .compute_stats()
+            .unwrap();
+
+        milli_snap!(format!("{map:?}"), "candidates_217_777", @r###"{"colour": (218.0, 1776.0)}"###);
+    }
+}
--- a/crates/milli/src/search/facet/facet_distribution_iter.rs
+++ b/crates/milli/src/search/facet/facet_distribution_iter.rs
@ -0,0 +1,301 @@
+use std::cmp::Reverse;
+use std::collections::BinaryHeap;
+use std::ops::ControlFlow;
+
+use heed::Result;
+use roaring::RoaringBitmap;
+
+use super::{get_first_facet_value, get_highest_level};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
+};
+use crate::heed_codec::BytesRefCodec;
+use crate::{CboRoaringBitmapCodec, DocumentId};
+
+/// Call the given closure on the facet distribution of the candidate documents.
+///
+/// The arguments to the closure are:
+/// - the facet value, as a byte slice
+/// - the number of documents among the candidates that contain this facet value
+/// - the id of a document which contains the facet value. Note that this document
+///   is not necessarily from the list of candidates, it is simply *any* document which
+///   contains this facet value.
+///
+/// The return value of the closure is a `ControlFlow<()>` which indicates whether we should
+/// keep iterating over the different facet values or stop.
+pub fn lexicographically_iterate_over_facet_distribution<'t, CB>(
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    candidates: &RoaringBitmap,
+    callback: CB,
+) -> Result<()>
+where
+    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
+{
+    let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
+    let mut fd = LexicographicFacetDistribution { rtxn, db, field_id, callback };
+    let highest_level = get_highest_level(rtxn, db, field_id)?;
+
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
+        fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
+        Ok(())
+    } else {
+        Ok(())
+    }
+}
+
+pub fn count_iterate_over_facet_distribution<'t, CB>(
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    candidates: &RoaringBitmap,
+    mut callback: CB,
+) -> Result<()>
+where
+    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
+{
+    /// # Important
+    /// The order of the fields determines the order in which the facet values will be returned.
+    /// This struct is inserted in a BinaryHeap and popped later on.
+    #[derive(Debug, PartialOrd, Ord, PartialEq, Eq)]
+    struct LevelEntry<'t> {
+        /// The number of candidates in this entry.
+        count: u64,
+        /// The key level of the entry.
+        level: Reverse<u8>,
+        /// The left bound key.
+        left_bound: &'t [u8],
+        /// The number of keys we must look for after `left_bound`.
+        group_size: u8,
+        /// Any docid in the set of matching documents. Used to find the original facet string.
+        any_docid: u32,
+    }
+
+    // Represents the list of keys that we must explore.
+    let mut heap = BinaryHeap::new();
+    let db = db.remap_data_type::<FacetGroupLazyValueCodec>();
+    let highest_level = get_highest_level(rtxn, db, field_id)?;
+
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
+        // We first fill the heap with values from the highest level
+        let starting_key =
+            FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
+        for el in db.range(rtxn, &(&starting_key..))?.take(usize::MAX) {
+            let (key, value) = el?;
+            // The range is unbounded on the right and the group size for the highest level is MAX,
+            // so we need to check that we are not iterating over the next field id
+            if key.field_id != field_id {
+                break;
+            }
+            let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
+            let count = intersection.len();
+            if count != 0 {
+                heap.push(LevelEntry {
+                    count,
+                    level: Reverse(key.level),
+                    left_bound: key.left_bound,
+                    group_size: value.size,
+                    any_docid: intersection.min().unwrap(),
+                });
+            }
+        }
+
+        while let Some(LevelEntry { count, level, left_bound, group_size, any_docid }) = heap.pop()
+        {
+            if let Reverse(0) = level {
+                match (callback)(left_bound, count, any_docid)? {
+                    ControlFlow::Continue(_) => (),
+                    ControlFlow::Break(_) => return Ok(()),
+                }
+            } else {
+                let starting_key = FacetGroupKey { field_id, level: level.0 - 1, left_bound };
+                for el in db.range(rtxn, &(&starting_key..))?.take(group_size as usize) {
+                    let (key, value) = el?;
+                    // The range is unbounded on the right and the group size for the highest level is MAX,
+                    // so we need to check that we are not iterating over the next field id
+                    if key.field_id != field_id {
+                        break;
+                    }
+                    let intersection = CboRoaringBitmapCodec::intersection_with_serialized(
+                        value.bitmap_bytes,
+                        candidates,
+                    )?;
+                    let count = intersection.len();
+                    if count != 0 {
+                        heap.push(LevelEntry {
+                            count,
+                            level: Reverse(key.level),
+                            left_bound: key.left_bound,
+                            group_size: value.size,
+                            any_docid: intersection.min().unwrap(),
+                        });
+                    }
+                }
+            }
+        }
+    }
+
+    Ok(())
+}
+
+/// Iterate over the facets values by lexicographic order.
+struct LexicographicFacetDistribution<'t, CB>
+where
+    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
+{
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
+    field_id: u16,
+    callback: CB,
+}
+
+impl<'t, CB> LexicographicFacetDistribution<'t, CB>
+where
+    CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
+{
+    fn iterate_level_0(
+        &mut self,
+        candidates: &RoaringBitmap,
+        starting_bound: &'t [u8],
+        group_size: usize,
+    ) -> Result<ControlFlow<()>> {
+        let starting_key =
+            FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_bound };
+        let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size);
+        for el in iter {
+            let (key, value) = el?;
+            // The range is unbounded on the right and the group size for the highest level is MAX,
+            // so we need to check that we are not iterating over the next field id
+            if key.field_id != self.field_id {
+                return Ok(ControlFlow::Break(()));
+            }
+            let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
+            if !docids_in_common.is_empty() {
+                let any_docid_in_common = docids_in_common.min().unwrap();
+                match (self.callback)(key.left_bound, docids_in_common.len(), any_docid_in_common)?
+                {
+                    ControlFlow::Continue(_) => (),
+                    ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
+                }
+            }
+        }
+        Ok(ControlFlow::Continue(()))
+    }
+
+    fn iterate(
+        &mut self,
+        candidates: &RoaringBitmap,
+        level: u8,
+        starting_bound: &'t [u8],
+        group_size: usize,
+    ) -> Result<ControlFlow<()>> {
+        if level == 0 {
+            return self.iterate_level_0(candidates, starting_bound, group_size);
+        }
+        let starting_key =
+            FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound };
+        let iter = self.db.range(self.rtxn, &(&starting_key..))?.take(group_size);
+
+        for el in iter {
+            let (key, value) = el?;
+            // The range is unbounded on the right and the group size for the highest level is MAX,
+            // so we need to check that we are not iterating over the next field id
+            if key.field_id != self.field_id {
+                return Ok(ControlFlow::Break(()));
+            }
+            let docids_in_common = CboRoaringBitmapCodec::intersection_with_serialized(
+                value.bitmap_bytes,
+                candidates,
+            )?;
+            if !docids_in_common.is_empty() {
+                let cf = self.iterate(
+                    &docids_in_common,
+                    level - 1,
+                    key.left_bound,
+                    value.size as usize,
+                )?;
+                match cf {
+                    ControlFlow::Continue(_) => (),
+                    ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
+                }
+            }
+        }
+        Ok(ControlFlow::Continue(()))
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::ControlFlow;
+
+    use heed::BytesDecode;
+    use roaring::RoaringBitmap;
+
+    use super::lexicographically_iterate_over_facet_distribution;
+    use crate::heed_codec::facet::OrderedF64Codec;
+    use crate::milli_snap;
+    use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
+
+    #[test]
+    fn filter_distribution_all() {
+        let indexes = [get_simple_index(), get_random_looking_index()];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (0..=255).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            lexicographically_iterate_over_facet_distribution(
+                &txn,
+                index.content,
+                0,
+                &candidates,
+                |facet, count, _| {
+                    let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
+                    results.push_str(&format!("{facet}: {count}\n"));
+                    Ok(ControlFlow::Continue(()))
+                },
+            )
+            .unwrap();
+            milli_snap!(results, i);
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_distribution_all_stop_early() {
+        let indexes = [get_simple_index(), get_random_looking_index()];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (0..=255).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            let mut nbr_facets = 0;
+            lexicographically_iterate_over_facet_distribution(
+                &txn,
+                index.content,
+                0,
+                &candidates,
+                |facet, count, _| {
+                    let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
+                    if nbr_facets == 100 {
+                        Ok(ControlFlow::Break(()))
+                    } else {
+                        nbr_facets += 1;
+                        results.push_str(&format!("{facet}: {count}\n"));
+                        Ok(ControlFlow::Continue(()))
+                    }
+                },
+            )
+            .unwrap();
+            milli_snap!(results, i);
+
+            txn.commit().unwrap();
+        }
+    }
+}
--- a/crates/milli/src/search/facet/facet_range_search.rs
+++ b/crates/milli/src/search/facet/facet_range_search.rs
@ -0,0 +1,688 @@
+use std::ops::{Bound, RangeBounds};
+
+use heed::BytesEncode;
+use roaring::RoaringBitmap;
+
+use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupLazyValueCodec, FacetGroupValueCodec,
+};
+use crate::heed_codec::BytesRefCodec;
+use crate::{CboRoaringBitmapCodec, Result};
+
+/// Find all the document ids for which the given field contains a value contained within
+/// the two bounds.
+pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BoundCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
+    right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
+    universe: Option<&RoaringBitmap>,
+    docids: &mut RoaringBitmap,
+) -> Result<()>
+where
+    BoundCodec: for<'a> BytesEncode<'a>,
+    for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Sized,
+{
+    let inner;
+    let left = match left {
+        Bound::Included(left) => {
+            inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
+            Bound::Included(inner.as_ref())
+        }
+        Bound::Excluded(left) => {
+            inner = BoundCodec::bytes_encode(left).map_err(heed::Error::Encoding)?;
+            Bound::Excluded(inner.as_ref())
+        }
+        Bound::Unbounded => Bound::Unbounded,
+    };
+    let inner;
+    let right = match right {
+        Bound::Included(right) => {
+            inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
+            Bound::Included(inner.as_ref())
+        }
+        Bound::Excluded(right) => {
+            inner = BoundCodec::bytes_encode(right).map_err(heed::Error::Encoding)?;
+            Bound::Excluded(inner.as_ref())
+        }
+        Bound::Unbounded => Bound::Unbounded,
+    };
+    let db = db.remap_types::<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>();
+    let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, universe, docids };
+    let highest_level = get_highest_level(rtxn, db, field_id)?;
+
+    if let Some(starting_left_bound) =
+        get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?
+    {
+        let rightmost_bound =
+            Bound::Included(get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
+        let group_size = usize::MAX;
+        f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
+        Ok(())
+    } else {
+        Ok(())
+    }
+}
+
+/// Fetch the document ids that have a facet with a value between the two given bounds
+struct FacetRangeSearch<'t, 'b, 'bitmap> {
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupLazyValueCodec>,
+    field_id: u16,
+    left: Bound<&'b [u8]>,
+    right: Bound<&'b [u8]>,
+    /// The subset of documents ids that are useful for this search.
+    /// Great performance optimizations can be achieved by only fetching values matching this subset.
+    universe: Option<&'bitmap RoaringBitmap>,
+    docids: &'bitmap mut RoaringBitmap,
+}
+
+impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
+    fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
+        let left_key =
+            FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };
+        let iter = self.db.range(self.rtxn, &(left_key..))?.take(group_size);
+        for el in iter {
+            let (key, value) = el?;
+            // the right side of the iter range is unbounded, so we need to make sure that we are not iterating
+            // on the next field id
+            if key.field_id != self.field_id {
+                return Ok(());
+            }
+            let should_skip = {
+                match self.left {
+                    Bound::Included(left) => left > key.left_bound,
+                    Bound::Excluded(left) => left >= key.left_bound,
+                    Bound::Unbounded => false,
+                }
+            };
+            if should_skip {
+                continue;
+            }
+            let should_stop = {
+                match self.right {
+                    Bound::Included(right) => right < key.left_bound,
+                    Bound::Excluded(right) => right <= key.left_bound,
+                    Bound::Unbounded => false,
+                }
+            };
+            if should_stop {
+                break;
+            }
+
+            if RangeBounds::<&[u8]>::contains(&(self.left, self.right), &key.left_bound) {
+                *self.docids |= match self.universe {
+                    Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                        value.bitmap_bytes,
+                        universe,
+                    )?,
+                    None => CboRoaringBitmapCodec::deserialize_from(value.bitmap_bytes)?,
+                };
+            }
+        }
+        Ok(())
+    }
+
+    /// Recursive part of the algorithm for level > 0.
+    ///
+    /// It works by visiting a slice of a level and checking whether the range asscociated
+    /// with each visited element is contained within the bounds.
+    ///
+    /// 1. So long as the element's range is less than the left bound, we do nothing and keep iterating
+    /// 2. If the element's range is fully contained by the bounds, then all of its docids are added to
+    /// the roaring bitmap.
+    /// 3. If the element's range merely intersects the bounds, then we call the algorithm recursively
+    /// on the children of the element from the level below.
+    /// 4. If the element's range is greater than the right bound, we do nothing and stop iterating.
+    /// Note that the right bound is found through either the `left_bound` of the *next* element,
+    /// or from the `rightmost_bound` argument
+    ///
+    /// ## Arguments
+    /// - `level`: the level being visited
+    /// - `starting_left_bound`: the left_bound of the first element to visit
+    /// - `rightmost_bound`: the right bound of the last element that should be visited
+    /// - `group_size`: the number of elements that should be visited
+    fn run(
+        &mut self,
+        level: u8,
+        starting_left_bound: &'t [u8],
+        rightmost_bound: Bound<&'t [u8]>,
+        group_size: usize,
+    ) -> Result<()> {
+        if level == 0 {
+            return self.run_level_0(starting_left_bound, group_size);
+        }
+
+        let left_key =
+            FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound };
+        let mut iter = self.db.range(self.rtxn, &(left_key..))?.take(group_size);
+
+        // We iterate over the range while keeping in memory the previous value
+        let (mut previous_key, mut previous_value) = iter.next().unwrap()?;
+        for el in iter {
+            let (next_key, next_value) = el?;
+            // the right of the iter range is potentially unbounded (e.g. if `group_size` is usize::MAX),
+            // so we need to make sure that we are not iterating on the next field id
+            if next_key.field_id != self.field_id {
+                break;
+            }
+            // now, do we skip, stop, or visit?
+            let should_skip = {
+                match self.left {
+                    Bound::Included(left) => left >= next_key.left_bound,
+                    Bound::Excluded(left) => left >= next_key.left_bound,
+                    Bound::Unbounded => false,
+                }
+            };
+            if should_skip {
+                previous_key = next_key;
+                previous_value = next_value;
+                continue;
+            }
+
+            // should we stop?
+            // We should if the search range doesn't include any
+            // element from the previous key or its successors
+            let should_stop = {
+                match self.right {
+                    Bound::Included(right) => right < previous_key.left_bound,
+                    Bound::Excluded(right) => right <= previous_key.left_bound,
+                    Bound::Unbounded => false,
+                }
+            };
+            if should_stop {
+                return Ok(());
+            }
+            // should we take the whole thing, without recursing down?
+            let should_take_whole_group = {
+                let left_condition = match self.left {
+                    Bound::Included(left) => previous_key.left_bound >= left,
+                    Bound::Excluded(left) => previous_key.left_bound > left,
+                    Bound::Unbounded => true,
+                };
+                let right_condition = match self.right {
+                    Bound::Included(right) => next_key.left_bound <= right,
+                    Bound::Excluded(right) => next_key.left_bound <= right,
+                    Bound::Unbounded => true,
+                };
+                left_condition && right_condition
+            };
+            if should_take_whole_group {
+                *self.docids |= match self.universe {
+                    Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                        previous_value.bitmap_bytes,
+                        universe,
+                    )?,
+                    None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
+                };
+                previous_key = next_key;
+                previous_value = next_value;
+                continue;
+            }
+            // from here, we should visit the children of the previous element and
+            // call the function recursively
+
+            let level = level - 1;
+            let starting_left_bound = previous_key.left_bound;
+            let rightmost_bound = Bound::Excluded(next_key.left_bound);
+            let group_size = previous_value.size as usize;
+
+            self.run(level, starting_left_bound, rightmost_bound, group_size)?;
+
+            previous_key = next_key;
+            previous_value = next_value;
+        }
+        // previous_key/previous_value are the last element's key/value
+
+        // now, do we skip, stop, or visit?
+        let should_skip = {
+            match (self.left, rightmost_bound) {
+                (Bound::Included(left), Bound::Included(right)) => left > right,
+                (Bound::Included(left), Bound::Excluded(right)) => left >= right,
+                (Bound::Excluded(left), Bound::Included(right) | Bound::Excluded(right)) => {
+                    left >= right
+                }
+                (Bound::Unbounded, _) => false,
+                (_, Bound::Unbounded) => false, // should never run?
+            }
+        };
+        if should_skip {
+            return Ok(());
+        }
+
+        // should we stop?
+        // We should if the search range doesn't include any
+        // element from the previous key or its successors
+        let should_stop = {
+            match self.right {
+                Bound::Included(right) => right < previous_key.left_bound,
+                Bound::Excluded(right) => right <= previous_key.left_bound,
+                Bound::Unbounded => false,
+            }
+        };
+        if should_stop {
+            return Ok(());
+        }
+        // should we take the whole thing, without recursing down?
+        let should_take_whole_group = {
+            let left_condition = match self.left {
+                Bound::Included(left) => previous_key.left_bound >= left,
+                Bound::Excluded(left) => previous_key.left_bound > left,
+                Bound::Unbounded => true,
+            };
+            let right_condition = match (self.right, rightmost_bound) {
+                (Bound::Included(right), Bound::Included(rightmost)) => {
+                    // we need to stay within the bound ..=right
+                    // the element's range goes to ..=righmost
+                    // so the element fits entirely within the bound if rightmost <= right
+                    rightmost <= right
+                }
+                (Bound::Included(right), Bound::Excluded(rightmost)) => {
+                    // we need to stay within the bound ..=right
+                    // the element's range goes to ..righmost
+                    // so the element fits entirely within the bound if rightmost <= right
+                    rightmost <= right
+                }
+                (Bound::Excluded(right), Bound::Included(rightmost)) => {
+                    // we need to stay within the bound ..right
+                    // the element's range goes to ..=righmost
+                    // so the element fits entirely within the bound if rightmost < right
+                    rightmost < right
+                }
+                (Bound::Excluded(right), Bound::Excluded(rightmost)) => {
+                    // we need to stay within the bound ..right
+                    // the element's range goes to ..righmost
+                    // so the element fits entirely within the bound if rightmost <= right
+                    rightmost <= right
+                }
+                (Bound::Unbounded, _) => {
+                    // we need to stay within the bound ..inf
+                    // so the element always fits entirely within the bound
+                    true
+                }
+                (_, Bound::Unbounded) => {
+                    // we need to stay within a finite bound
+                    // but the element's range goes to ..inf
+                    // so the element never fits entirely within the bound
+                    false
+                }
+            };
+            left_condition && right_condition
+        };
+        if should_take_whole_group {
+            *self.docids |= match self.universe {
+                Some(universe) => CboRoaringBitmapCodec::intersection_with_serialized(
+                    previous_value.bitmap_bytes,
+                    universe,
+                )?,
+                None => CboRoaringBitmapCodec::deserialize_from(previous_value.bitmap_bytes)?,
+            };
+        } else {
+            let level = level - 1;
+            let starting_left_bound = previous_key.left_bound;
+            let group_size = previous_value.size as usize;
+
+            self.run(level, starting_left_bound, rightmost_bound, group_size)?;
+        }
+
+        Ok(())
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use std::ops::Bound;
+
+    use roaring::RoaringBitmap;
+
+    use super::find_docids_of_facet_within_bounds;
+    use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
+    use crate::milli_snap;
+    use crate::search::facet::tests::{
+        get_random_looking_index, get_random_looking_index_with_multiple_field_ids,
+        get_simple_index, get_simple_index_with_multiple_field_ids,
+    };
+    use crate::snapshot_tests::display_bitmap;
+
+    #[test]
+    fn random_looking_index_snap() {
+        let index = get_random_looking_index();
+        milli_snap!(format!("{index}"), @"3256c76a7c1b768a013e78d5fa6e9ff9");
+    }
+
+    #[test]
+    fn random_looking_index_with_multiple_field_ids_snap() {
+        let index = get_random_looking_index_with_multiple_field_ids();
+        milli_snap!(format!("{index}"), @"c3e5fe06a8f1c404ed4935b32c90a89b");
+    }
+
+    #[test]
+    fn simple_index_snap() {
+        let index = get_simple_index();
+        milli_snap!(format!("{index}"), @"5dbfa134cc44abeb3ab6242fc182e48e");
+    }
+
+    #[test]
+    fn simple_index_with_multiple_field_ids_snap() {
+        let index = get_simple_index_with_multiple_field_ids();
+        milli_snap!(format!("{index}"), @"a4893298218f682bc76357f46777448c");
+    }
+
+    #[test]
+    fn filter_range_increasing() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+            get_random_looking_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let mut results = String::new();
+            for i in 0..=255 {
+                let i = i as f64;
+                let start = Bound::Included(0.);
+                let end = Bound::Included(i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results.push_str(&format!("0 <= . <= {i} : {}\n", display_bitmap(&docids)));
+            }
+            milli_snap!(results, format!("included_{i}"));
+            let mut results = String::new();
+            for i in 0..=255 {
+                let i = i as f64;
+                let start = Bound::Excluded(0.);
+                let end = Bound::Excluded(i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results.push_str(&format!("0 < . < {i} : {}\n", display_bitmap(&docids)));
+            }
+            milli_snap!(results, format!("excluded_{i}"));
+            txn.commit().unwrap();
+        }
+    }
+    #[test]
+    fn filter_range_decreasing() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+            get_random_looking_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+
+            let mut results = String::new();
+
+            for i in (0..=255).rev() {
+                let i = i as f64;
+                let start = Bound::Included(i);
+                let end = Bound::Included(255.);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                results.push_str(&format!("{i} <= . <= 255 : {}\n", display_bitmap(&docids)));
+            }
+
+            milli_snap!(results, format!("included_{i}"));
+
+            let mut results = String::new();
+
+            for i in (0..=255).rev() {
+                let i = i as f64;
+                let start = Bound::Excluded(i);
+                let end = Bound::Excluded(255.);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                results.push_str(&format!("{i} < . < 255 : {}\n", display_bitmap(&docids)));
+            }
+
+            milli_snap!(results, format!("excluded_{i}"));
+
+            txn.commit().unwrap();
+        }
+    }
+    #[test]
+    fn filter_range_pinch() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+            get_random_looking_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+
+            let mut results = String::new();
+
+            for i in (0..=128).rev() {
+                let i = i as f64;
+                let start = Bound::Included(i);
+                let end = Bound::Included(255. - i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                results.push_str(&format!(
+                    "{i} <= . <= {r} : {docids}\n",
+                    r = 255. - i,
+                    docids = display_bitmap(&docids)
+                ));
+            }
+
+            milli_snap!(results, format!("included_{i}"));
+
+            let mut results = String::new();
+
+            for i in (0..=128).rev() {
+                let i = i as f64;
+                let start = Bound::Excluded(i);
+                let end = Bound::Excluded(255. - i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                results.push_str(&format!(
+                    "{i} <  . < {r} {docids}\n",
+                    r = 255. - i,
+                    docids = display_bitmap(&docids)
+                ));
+            }
+
+            milli_snap!(results, format!("excluded_{i}"));
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_range_unbounded() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+            get_random_looking_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let mut results = String::new();
+            for i in 0..=255 {
+                let i = i as f64;
+                let start = Bound::Included(i);
+                let end = Bound::Unbounded;
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results.push_str(&format!(">= {i}: {}\n", display_bitmap(&docids)));
+            }
+            milli_snap!(results, format!("start_from_included_{i}"));
+            let mut results = String::new();
+            for i in 0..=255 {
+                let i = i as f64;
+                let start = Bound::Unbounded;
+                let end = Bound::Included(i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results.push_str(&format!("<= {i}: {}\n", display_bitmap(&docids)));
+            }
+            milli_snap!(results, format!("end_at_included_{i}"));
+
+            let mut docids = RoaringBitmap::new();
+            find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                &txn,
+                index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                0,
+                &Bound::Unbounded,
+                &Bound::Unbounded,
+                None,
+                &mut docids,
+            )
+            .unwrap();
+            milli_snap!(
+                &format!("all field_id 0: {}\n", display_bitmap(&docids)),
+                format!("unbounded_field_id_0_{i}")
+            );
+
+            let mut docids = RoaringBitmap::new();
+            find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                &txn,
+                index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                1,
+                &Bound::Unbounded,
+                &Bound::Unbounded,
+                None,
+                &mut docids,
+            )
+            .unwrap();
+            milli_snap!(
+                &format!("all field_id 1:  {}\n", display_bitmap(&docids)),
+                format!("unbounded_field_id_1_{i}")
+            );
+
+            drop(txn);
+        }
+    }
+
+    #[test]
+    fn filter_range_exact() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+            get_random_looking_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let mut results_0 = String::new();
+            let mut results_1 = String::new();
+            for i in 0..=255 {
+                let i = i as f64;
+                let start = Bound::Included(i);
+                let end = Bound::Included(i);
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    0,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results_0.push_str(&format!("{i}: {}\n", display_bitmap(&docids)));
+
+                let mut docids = RoaringBitmap::new();
+                find_docids_of_facet_within_bounds::<OrderedF64Codec>(
+                    &txn,
+                    index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
+                    1,
+                    &start,
+                    &end,
+                    None,
+                    &mut docids,
+                )
+                .unwrap();
+                #[allow(clippy::format_push_string)]
+                results_1.push_str(&format!("{i}: {}\n", display_bitmap(&docids)));
+            }
+            milli_snap!(results_0, format!("field_id_0_exact_{i}"));
+            milli_snap!(results_1, format!("field_id_1_exact_{i}"));
+
+            drop(txn);
+        }
+    }
+}
--- a/crates/milli/src/search/facet/facet_sort_ascending.rs
+++ b/crates/milli/src/search/facet/facet_sort_ascending.rs
@ -0,0 +1,230 @@
+use heed::Result;
+use roaring::RoaringBitmap;
+
+use super::{get_first_facet_value, get_highest_level};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
+};
+use crate::heed_codec::BytesRefCodec;
+
+/// Return an iterator which iterates over the given candidate documents in
+/// ascending order of their facet value for the given field id.
+///
+/// The documents returned by the iterator are grouped by the facet values that
+/// determined their rank. For example, given the documents:
+///
+/// ```text
+/// 0: { "colour": ["blue", "green"] }
+/// 1: { "colour": ["blue", "red"] }
+/// 2: { "colour": ["orange", "red"] }
+/// 3: { "colour": ["green", "red"] }
+/// 4: { "colour": ["blue", "orange", "red"] }
+/// ```
+/// Then calling the function on the candidates `[0, 2, 3, 4]` will return an iterator
+/// over the following elements:
+/// ```text
+/// [0, 4]  // corresponds to all the documents within the candidates that have the facet value "blue"
+/// [3]     // same for "green"
+/// [2]     // same for "orange"
+/// END
+/// ```
+/// Note that once a document id is returned by the iterator, it is never returned again.
+pub fn ascending_facet_sort<'t>(
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    candidates: RoaringBitmap,
+) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
+    let highest_level = get_highest_level(rtxn, db, field_id)?;
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
+        let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
+        let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
+
+        Ok(itertools::Either::Left(AscendingFacetSort {
+            rtxn,
+            db,
+            field_id,
+            stack: vec![(candidates, iter)],
+        }))
+    } else {
+        Ok(itertools::Either::Right(std::iter::empty()))
+    }
+}
+
+struct AscendingFacetSort<'t, 'e> {
+    rtxn: &'t heed::RoTxn<'e>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    #[allow(clippy::type_complexity)]
+    stack: Vec<(
+        RoaringBitmap,
+        std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>>,
+    )>,
+}
+
+impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
+    type Item = Result<(RoaringBitmap, &'t [u8])>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        'outer: loop {
+            let (documents_ids, deepest_iter) = self.stack.last_mut()?;
+            for result in deepest_iter {
+                let (
+                    FacetGroupKey { level, left_bound, field_id },
+                    FacetGroupValue { size: group_size, mut bitmap },
+                ) = result.unwrap();
+                // The range is unbounded on the right and the group size for the highest level is MAX,
+                // so we need to check that we are not iterating over the next field id
+                if field_id != self.field_id {
+                    return None;
+                }
+
+                // If the last iterator found an empty set of documents it means
+                // that we found all the documents in the sub level iterations already,
+                // we can pop this level iterator.
+                if documents_ids.is_empty() {
+                    // break our of the for loop into the end of the 'outer loop, which
+                    // pops the stack
+                    break;
+                }
+
+                bitmap &= &*documents_ids;
+                if !bitmap.is_empty() {
+                    *documents_ids -= &bitmap;
+
+                    if level == 0 {
+                        // Since the level is 0, the left_bound is the exact value.
+                        return Some(Ok((bitmap, left_bound)));
+                    }
+                    let starting_key_below =
+                        FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound };
+                    let iter = match self.db.range(self.rtxn, &(starting_key_below..)) {
+                        Ok(iter) => iter,
+                        Err(e) => return Some(Err(e)),
+                    }
+                    .take(group_size as usize);
+
+                    self.stack.push((bitmap, iter));
+                    continue 'outer;
+                }
+            }
+            self.stack.pop();
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use roaring::RoaringBitmap;
+
+    use crate::milli_snap;
+    use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
+    use crate::search::facet::tests::{
+        get_random_looking_index, get_random_looking_string_index_with_multiple_field_ids,
+        get_simple_index, get_simple_string_index_with_multiple_field_ids,
+    };
+    use crate::snapshot_tests::display_bitmap;
+
+    #[test]
+    fn filter_sort_ascending() {
+        let indexes = [get_simple_index(), get_random_looking_index()];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, i);
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_sort_ascending_multiple_field_ids() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, format!("{i}-0"));
+
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, format!("{i}-1"));
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_sort_ascending_with_no_candidates() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for index in indexes {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = RoaringBitmap::new();
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_sort_ascending_with_inexisting_field_id() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for index in indexes {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = RoaringBitmap::new();
+            let mut results = String::new();
+            let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            txn.commit().unwrap();
+        }
+    }
+}
--- a/crates/milli/src/search/facet/facet_sort_descending.rs
+++ b/crates/milli/src/search/facet/facet_sort_descending.rs
@ -0,0 +1,244 @@
+use std::ops::Bound;
+
+use heed::Result;
+use roaring::RoaringBitmap;
+
+use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
+use crate::heed_codec::facet::{
+    FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
+};
+use crate::heed_codec::BytesRefCodec;
+
+/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
+///
+/// This function does the same thing, but in the opposite order.
+pub fn descending_facet_sort<'t>(
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    candidates: RoaringBitmap,
+) -> Result<impl Iterator<Item = Result<(RoaringBitmap, &'t [u8])>> + 't> {
+    let highest_level = get_highest_level(rtxn, db, field_id)?;
+    if let Some(first_bound) = get_first_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)? {
+        let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
+        let last_bound = get_last_facet_value::<BytesRefCodec, _>(rtxn, db, field_id)?.unwrap();
+        let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
+        let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
+        Ok(itertools::Either::Left(DescendingFacetSort {
+            rtxn,
+            db,
+            field_id,
+            stack: vec![(candidates, iter, Bound::Included(last_bound))],
+        }))
+    } else {
+        Ok(itertools::Either::Right(std::iter::empty()))
+    }
+}
+
+struct DescendingFacetSort<'t> {
+    rtxn: &'t heed::RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+    field_id: u16,
+    #[allow(clippy::type_complexity)]
+    stack: Vec<(
+        RoaringBitmap,
+        std::iter::Take<
+            heed::RoRevRange<'t, FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
+        >,
+        Bound<&'t [u8]>,
+    )>,
+}
+
+impl<'t> Iterator for DescendingFacetSort<'t> {
+    type Item = Result<(RoaringBitmap, &'t [u8])>;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        'outer: loop {
+            let (documents_ids, deepest_iter, right_bound) = self.stack.last_mut()?;
+            for result in deepest_iter.by_ref() {
+                let (
+                    FacetGroupKey { level, left_bound, field_id },
+                    FacetGroupValue { size: group_size, mut bitmap },
+                ) = result.unwrap();
+                // The range is unbounded on the right and the group size for the highest level is MAX,
+                // so we need to check that we are not iterating over the next field id
+                if field_id != self.field_id {
+                    return None;
+                }
+                // If the last iterator found an empty set of documents it means
+                // that we found all the documents in the sub level iterations already,
+                // we can pop this level iterator.
+                if documents_ids.is_empty() {
+                    break;
+                }
+
+                bitmap &= &*documents_ids;
+                if !bitmap.is_empty() {
+                    *documents_ids -= &bitmap;
+
+                    if level == 0 {
+                        // Since we're at the level 0 the left_bound is the exact value.
+                        return Some(Ok((bitmap, left_bound)));
+                    }
+                    let starting_key_below =
+                        FacetGroupKey { field_id, level: level - 1, left_bound };
+
+                    let end_key_kelow = match *right_bound {
+                        Bound::Included(right) => Bound::Included(FacetGroupKey {
+                            field_id,
+                            level: level - 1,
+                            left_bound: right,
+                        }),
+                        Bound::Excluded(right) => Bound::Excluded(FacetGroupKey {
+                            field_id,
+                            level: level - 1,
+                            left_bound: right,
+                        }),
+                        Bound::Unbounded => Bound::Unbounded,
+                    };
+                    let prev_right_bound = *right_bound;
+                    *right_bound = Bound::Excluded(left_bound);
+                    let iter = match self
+                        .db
+                        .remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
+                        .rev_range(self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow))
+                    {
+                        Ok(iter) => iter,
+                        Err(e) => return Some(Err(e)),
+                    }
+                    .take(group_size as usize);
+
+                    self.stack.push((bitmap, iter, prev_right_bound));
+                    continue 'outer;
+                }
+                *right_bound = Bound::Excluded(left_bound);
+            }
+            self.stack.pop();
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use roaring::RoaringBitmap;
+
+    use crate::heed_codec::facet::FacetGroupKeyCodec;
+    use crate::heed_codec::BytesRefCodec;
+    use crate::milli_snap;
+    use crate::search::facet::facet_sort_descending::descending_facet_sort;
+    use crate::search::facet::tests::{
+        get_random_looking_index, get_random_looking_string_index_with_multiple_field_ids,
+        get_simple_index, get_simple_index_with_multiple_field_ids,
+        get_simple_string_index_with_multiple_field_ids,
+    };
+    use crate::snapshot_tests::display_bitmap;
+
+    #[test]
+    fn filter_sort_descending() {
+        let indexes = [
+            get_simple_index(),
+            get_random_looking_index(),
+            get_simple_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
+            let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, i);
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_sort_descending_multiple_field_ids() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for (i, index) in indexes.iter().enumerate() {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = (200..=300).collect::<RoaringBitmap>();
+            let mut results = String::new();
+            let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
+            let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, format!("{i}-0"));
+
+            let mut results = String::new();
+
+            let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            milli_snap!(results, format!("{i}-1"));
+
+            txn.commit().unwrap();
+        }
+    }
+    #[test]
+    fn filter_sort_ascending_with_no_candidates() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for index in indexes {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = RoaringBitmap::new();
+            let mut results = String::new();
+            let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            let mut results = String::new();
+            let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            txn.commit().unwrap();
+        }
+    }
+
+    #[test]
+    fn filter_sort_ascending_with_inexisting_field_id() {
+        let indexes = [
+            get_simple_string_index_with_multiple_field_ids(),
+            get_random_looking_string_index_with_multiple_field_ids(),
+        ];
+        for index in indexes {
+            let txn = index.env.read_txn().unwrap();
+            let candidates = RoaringBitmap::new();
+            let mut results = String::new();
+            let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
+            for el in iter {
+                let (docids, _) = el.unwrap();
+                results.push_str(&display_bitmap(&docids));
+                results.push('\n');
+            }
+            assert!(results.is_empty());
+
+            txn.commit().unwrap();
+        }
+    }
+}
--- a/crates/milli/src/search/facet/filter.rs
+++ b/crates/milli/src/search/facet/filter.rs
--- a/crates/milli/src/search/facet/mod.rs
+++ b/crates/milli/src/search/facet/mod.rs
@ -0,0 +1,227 @@
+pub use facet_sort_ascending::ascending_facet_sort;
+pub use facet_sort_descending::descending_facet_sort;
+use heed::types::{Bytes, DecodeIgnore};
+use heed::{BytesDecode, RoTxn};
+use roaring::RoaringBitmap;
+
+pub use self::facet_distribution::{FacetDistribution, OrderBy, DEFAULT_VALUES_PER_FACET};
+pub use self::filter::{BadGeoError, Filter};
+pub use self::search::{FacetValueHit, SearchForFacetValues};
+use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
+use crate::heed_codec::BytesRefCodec;
+use crate::{Index, Result};
+
+mod facet_distribution;
+mod facet_distribution_iter;
+mod facet_range_search;
+mod facet_sort_ascending;
+mod facet_sort_descending;
+mod filter;
+mod search;
+
+fn facet_extreme_value<'t>(
+    mut extreme_it: impl Iterator<Item = heed::Result<(RoaringBitmap, &'t [u8])>> + 't,
+) -> Result<Option<f64>> {
+    let extreme_value =
+        if let Some(extreme_value) = extreme_it.next() { extreme_value } else { return Ok(None) };
+    let (_, extreme_value) = extreme_value?;
+    OrderedF64Codec::bytes_decode(extreme_value)
+        .map(Some)
+        .map_err(heed::Error::Decoding)
+        .map_err(Into::into)
+}
+
+pub fn facet_min_value<'t>(
+    index: &'t Index,
+    rtxn: &'t heed::RoTxn<'t>,
+    field_id: u16,
+    candidates: RoaringBitmap,
+) -> Result<Option<f64>> {
+    let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
+    let it = ascending_facet_sort(rtxn, db, field_id, candidates)?;
+    facet_extreme_value(it)
+}
+
+pub fn facet_max_value<'t>(
+    index: &'t Index,
+    rtxn: &'t heed::RoTxn<'t>,
+    field_id: u16,
+    candidates: RoaringBitmap,
+) -> Result<Option<f64>> {
+    let db = index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
+    let it = descending_facet_sort(rtxn, db, field_id, candidates)?;
+    facet_extreme_value(it)
+}
+
+/// Get the first facet value in the facet database
+pub(crate) fn get_first_facet_value<'t, BoundCodec, DC>(
+    txn: &'t RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
+    field_id: u16,
+) -> heed::Result<Option<BoundCodec::DItem>>
+where
+    BoundCodec: BytesDecode<'t>,
+{
+    let mut level0prefix = vec![];
+    level0prefix.extend_from_slice(&field_id.to_be_bytes());
+    level0prefix.push(0);
+    let mut level0_iter_forward =
+        db.remap_types::<Bytes, DecodeIgnore>().prefix_iter(txn, level0prefix.as_slice())?;
+    if let Some(first) = level0_iter_forward.next() {
+        let (first_key, _) = first?;
+        let first_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key)
+            .map_err(heed::Error::Decoding)?;
+        Ok(Some(first_key.left_bound))
+    } else {
+        Ok(None)
+    }
+}
+
+/// Get the last facet value in the facet database
+pub(crate) fn get_last_facet_value<'t, BoundCodec, DC>(
+    txn: &'t RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
+    field_id: u16,
+) -> heed::Result<Option<BoundCodec::DItem>>
+where
+    BoundCodec: BytesDecode<'t>,
+{
+    let mut level0prefix = vec![];
+    level0prefix.extend_from_slice(&field_id.to_be_bytes());
+    level0prefix.push(0);
+    let mut level0_iter_backward =
+        db.remap_types::<Bytes, DecodeIgnore>().rev_prefix_iter(txn, level0prefix.as_slice())?;
+    if let Some(last) = level0_iter_backward.next() {
+        let (last_key, _) = last?;
+        let last_key = FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key)
+            .map_err(heed::Error::Decoding)?;
+        Ok(Some(last_key.left_bound))
+    } else {
+        Ok(None)
+    }
+}
+
+/// Get the height of the highest level in the facet database
+pub(crate) fn get_highest_level<'t, DC>(
+    txn: &'t RoTxn<'t>,
+    db: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, DC>,
+    field_id: u16,
+) -> heed::Result<u8> {
+    let field_id_prefix = &field_id.to_be_bytes();
+    Ok(db
+        .remap_types::<Bytes, DecodeIgnore>()
+        .rev_prefix_iter(txn, field_id_prefix)?
+        .next()
+        .map(|el| {
+            let (key, _) = el.unwrap();
+            let key = FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(key).unwrap();
+            key.level
+        })
+        .unwrap_or(0))
+}
+
+#[cfg(test)]
+pub(crate) mod tests {
+    use rand::{Rng, SeedableRng};
+    use roaring::RoaringBitmap;
+
+    use crate::heed_codec::facet::OrderedF64Codec;
+    use crate::heed_codec::StrRefCodec;
+    use crate::update::facet::test_helpers::FacetIndex;
+
+    pub fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
+        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+        for i in 0..256u16 {
+            let mut bitmap = RoaringBitmap::new();
+            bitmap.insert(i as u32);
+            index.insert(&mut txn, 0, &(i as f64), &bitmap);
+        }
+        txn.commit().unwrap();
+        index
+    }
+    pub fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
+        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
+
+        for key in std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128) {
+            let mut bitmap = RoaringBitmap::new();
+            bitmap.insert(key);
+            bitmap.insert(key + 100);
+            index.insert(&mut txn, 0, &(key as f64), &bitmap);
+        }
+        txn.commit().unwrap();
+        index
+    }
+    pub fn get_simple_index_with_multiple_field_ids() -> FacetIndex<OrderedF64Codec> {
+        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+        for fid in 0..2 {
+            for i in 0..256u16 {
+                let mut bitmap = RoaringBitmap::new();
+                bitmap.insert(i as u32);
+                index.insert(&mut txn, fid, &(i as f64), &bitmap);
+            }
+        }
+        txn.commit().unwrap();
+        index
+    }
+    pub fn get_random_looking_index_with_multiple_field_ids() -> FacetIndex<OrderedF64Codec> {
+        let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+
+        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
+        let keys =
+            std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
+        for fid in 0..2 {
+            for &key in &keys {
+                let mut bitmap = RoaringBitmap::new();
+                bitmap.insert(key);
+                bitmap.insert(key + 100);
+                index.insert(&mut txn, fid, &(key as f64), &bitmap);
+            }
+        }
+        txn.commit().unwrap();
+        index
+    }
+    pub fn get_simple_string_index_with_multiple_field_ids() -> FacetIndex<StrRefCodec> {
+        let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+        for fid in 0..2 {
+            for i in 0..256u16 {
+                let mut bitmap = RoaringBitmap::new();
+                bitmap.insert(i as u32);
+                if i % 2 == 0 {
+                    index.insert(&mut txn, fid, &format!("{i}").as_str(), &bitmap);
+                } else {
+                    index.insert(&mut txn, fid, &"", &bitmap);
+                }
+            }
+        }
+        txn.commit().unwrap();
+        index
+    }
+    pub fn get_random_looking_string_index_with_multiple_field_ids() -> FacetIndex<StrRefCodec> {
+        let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
+        let mut txn = index.env.write_txn().unwrap();
+
+        let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
+        let keys =
+            std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
+        for fid in 0..2 {
+            for &key in &keys {
+                let mut bitmap = RoaringBitmap::new();
+                bitmap.insert(key);
+                bitmap.insert(key + 100);
+                if key % 2 == 0 {
+                    index.insert(&mut txn, fid, &format!("{key}").as_str(), &bitmap);
+                } else {
+                    index.insert(&mut txn, fid, &"", &bitmap);
+                }
+            }
+        }
+        txn.commit().unwrap();
+        index
+    }
+}
--- a/crates/milli/src/search/facet/search.rs
+++ b/crates/milli/src/search/facet/search.rs
@ -0,0 +1,358 @@
+use std::cmp::{Ordering, Reverse};
+use std::collections::BinaryHeap;
+use std::ops::ControlFlow;
+
+use charabia::normalizer::NormalizerOption;
+use charabia::{Language, Normalize, StrDetection, Token};
+use fst::automaton::{Automaton, Str};
+use fst::{IntoStreamer, Streamer};
+use roaring::RoaringBitmap;
+use tracing::error;
+
+use crate::error::UserError;
+use crate::heed_codec::facet::{FacetGroupKey, FacetGroupValue};
+use crate::search::build_dfa;
+use crate::{DocumentId, FieldId, OrderBy, Result, Search};
+
+/// The maximum number of values per facet returned by the facet search route.
+const DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET: usize = 100;
+
+pub struct SearchForFacetValues<'a> {
+    query: Option<String>,
+    facet: String,
+    search_query: Search<'a>,
+    max_values: usize,
+    is_hybrid: bool,
+    locales: Option<Vec<Language>>,
+}
+
+impl<'a> SearchForFacetValues<'a> {
+    pub fn new(
+        facet: String,
+        search_query: Search<'a>,
+        is_hybrid: bool,
+    ) -> SearchForFacetValues<'a> {
+        SearchForFacetValues {
+            query: None,
+            facet,
+            search_query,
+            max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
+            is_hybrid,
+            locales: None,
+        }
+    }
+
+    pub fn query(&mut self, query: impl Into<String>) -> &mut Self {
+        self.query = Some(query.into());
+        self
+    }
+
+    pub fn max_values(&mut self, max: usize) -> &mut Self {
+        self.max_values = max;
+        self
+    }
+
+    pub fn locales(&mut self, locales: Vec<Language>) -> &mut Self {
+        self.locales = Some(locales);
+        self
+    }
+
+    fn one_original_value_of(
+        &self,
+        field_id: FieldId,
+        facet_str: &str,
+        any_docid: DocumentId,
+    ) -> Result<Option<String>> {
+        let index = self.search_query.index;
+        let rtxn = self.search_query.rtxn;
+        let key: (FieldId, _, &str) = (field_id, any_docid, facet_str);
+        Ok(index.field_id_docid_facet_strings.get(rtxn, &key)?.map(|v| v.to_owned()))
+    }
+
+    pub fn execute(&self) -> Result<Vec<FacetValueHit>> {
+        let index = self.search_query.index;
+        let rtxn = self.search_query.rtxn;
+
+        let filterable_fields = index.filterable_fields(rtxn)?;
+        if !filterable_fields.contains(&self.facet) {
+            let (valid_fields, hidden_fields) =
+                index.remove_hidden_fields(rtxn, filterable_fields)?;
+
+            return Err(UserError::InvalidFacetSearchFacetName {
+                field: self.facet.clone(),
+                valid_fields,
+                hidden_fields,
+            }
+            .into());
+        }
+
+        let fields_ids_map = index.fields_ids_map(rtxn)?;
+        let fid = match fields_ids_map.id(&self.facet) {
+            Some(fid) => fid,
+            // we return an empty list of results when the attribute has been
+            // set as filterable but no document contains this field (yet).
+            None => return Ok(Vec::new()),
+        };
+
+        let fst = match self.search_query.index.facet_id_string_fst.get(rtxn, &fid)? {
+            Some(fst) => fst,
+            None => return Ok(Vec::new()),
+        };
+
+        let search_candidates = self.search_query.execute_for_candidates(
+            self.is_hybrid
+                || self
+                    .search_query
+                    .semantic
+                    .as_ref()
+                    .and_then(|semantic| semantic.vector.as_ref())
+                    .is_some(),
+        )?;
+
+        let mut results = match index.sort_facet_values_by(rtxn)?.get(&self.facet) {
+            OrderBy::Lexicographic => ValuesCollection::by_lexicographic(self.max_values),
+            OrderBy::Count => ValuesCollection::by_count(self.max_values),
+        };
+
+        match self.query.as_ref() {
+            Some(query) => {
+                let query = normalize_facet_string(query, self.locales.as_deref());
+                let query = query.as_ref();
+
+                let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
+                let field_authorizes_typos =
+                    !self.search_query.index.exact_attributes_ids(rtxn)?.contains(&fid);
+
+                if authorize_typos && field_authorizes_typos {
+                    let exact_words_fst = self.search_query.index.exact_words(rtxn)?;
+                    if exact_words_fst.map_or(false, |fst| fst.contains(query)) {
+                        if fst.contains(query) {
+                            self.fetch_original_facets_using_normalized(
+                                fid,
+                                query,
+                                query,
+                                &search_candidates,
+                                &mut results,
+                            )?;
+                        }
+                    } else {
+                        let one_typo = self.search_query.index.min_word_len_one_typo(rtxn)?;
+                        let two_typos = self.search_query.index.min_word_len_two_typos(rtxn)?;
+
+                        let is_prefix = true;
+                        let automaton = if query.len() < one_typo as usize {
+                            build_dfa(query, 0, is_prefix)
+                        } else if query.len() < two_typos as usize {
+                            build_dfa(query, 1, is_prefix)
+                        } else {
+                            build_dfa(query, 2, is_prefix)
+                        };
+
+                        let mut stream = fst.search(automaton).into_stream();
+                        while let Some(facet_value) = stream.next() {
+                            let value = std::str::from_utf8(facet_value)?;
+                            if self
+                                .fetch_original_facets_using_normalized(
+                                    fid,
+                                    value,
+                                    query,
+                                    &search_candidates,
+                                    &mut results,
+                                )?
+                                .is_break()
+                            {
+                                break;
+                            }
+                        }
+                    }
+                } else {
+                    let automaton = Str::new(query).starts_with();
+                    let mut stream = fst.search(automaton).into_stream();
+                    while let Some(facet_value) = stream.next() {
+                        let value = std::str::from_utf8(facet_value)?;
+                        if self
+                            .fetch_original_facets_using_normalized(
+                                fid,
+                                value,
+                                query,
+                                &search_candidates,
+                                &mut results,
+                            )?
+                            .is_break()
+                        {
+                            break;
+                        }
+                    }
+                }
+            }
+            None => {
+                let prefix = FacetGroupKey { field_id: fid, level: 0, left_bound: "" };
+                for result in index.facet_id_string_docids.prefix_iter(rtxn, &prefix)? {
+                    let (FacetGroupKey { left_bound, .. }, FacetGroupValue { bitmap, .. }) =
+                        result?;
+                    let count = search_candidates.intersection_len(&bitmap);
+                    if count != 0 {
+                        let value = self
+                            .one_original_value_of(fid, left_bound, bitmap.min().unwrap())?
+                            .unwrap_or_else(|| left_bound.to_string());
+                        if results.insert(FacetValueHit { value, count }).is_break() {
+                            break;
+                        }
+                    }
+                }
+            }
+        }
+
+        Ok(results.into_sorted_vec())
+    }
+
+    fn fetch_original_facets_using_normalized(
+        &self,
+        fid: FieldId,
+        value: &str,
+        query: &str,
+        search_candidates: &RoaringBitmap,
+        results: &mut ValuesCollection,
+    ) -> Result<ControlFlow<()>> {
+        let index = self.search_query.index;
+        let rtxn = self.search_query.rtxn;
+
+        let database = index.facet_id_normalized_string_strings;
+        let key = (fid, value);
+        let original_strings = match database.get(rtxn, &key)? {
+            Some(original_strings) => original_strings,
+            None => {
+                error!("the facet value is missing from the facet database: {key:?}");
+                return Ok(ControlFlow::Continue(()));
+            }
+        };
+        for original in original_strings {
+            let key = FacetGroupKey { field_id: fid, level: 0, left_bound: original.as_str() };
+            let docids = match index.facet_id_string_docids.get(rtxn, &key)? {
+                Some(FacetGroupValue { bitmap, .. }) => bitmap,
+                None => {
+                    error!("the facet value is missing from the facet database: {key:?}");
+                    return Ok(ControlFlow::Continue(()));
+                }
+            };
+            let count = search_candidates.intersection_len(&docids);
+            if count != 0 {
+                let value = self
+                    .one_original_value_of(fid, &original, docids.min().unwrap())?
+                    .unwrap_or_else(|| query.to_string());
+                if results.insert(FacetValueHit { value, count }).is_break() {
+                    break;
+                }
+            }
+        }
+
+        Ok(ControlFlow::Continue(()))
+    }
+}
+
+#[derive(Debug, Clone, serde::Serialize, PartialEq)]
+pub struct FacetValueHit {
+    /// The original facet value
+    pub value: String,
+    /// The number of documents associated to this facet
+    pub count: u64,
+}
+
+impl PartialOrd for FacetValueHit {
+    fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
+        Some(self.cmp(other))
+    }
+}
+
+impl Ord for FacetValueHit {
+    fn cmp(&self, other: &Self) -> Ordering {
+        self.count.cmp(&other.count).then_with(|| self.value.cmp(&other.value))
+    }
+}
+
+impl Eq for FacetValueHit {}
+
+/// A wrapper type that collects the best facet values by
+/// lexicographic or number of associated values.
+enum ValuesCollection {
+    /// Keeps the top values according to the lexicographic order.
+    Lexicographic { max: usize, content: Vec<FacetValueHit> },
+    /// Keeps the top values according to the number of values associated to them.
+    ///
+    /// Note that it is a max heap and we need to move the smallest counts
+    /// at the top to be able to pop them when we reach the max_values limit.
+    Count { max: usize, content: BinaryHeap<Reverse<FacetValueHit>> },
+}
+
+impl ValuesCollection {
+    pub fn by_lexicographic(max: usize) -> Self {
+        ValuesCollection::Lexicographic { max, content: Vec::new() }
+    }
+
+    pub fn by_count(max: usize) -> Self {
+        ValuesCollection::Count { max, content: BinaryHeap::new() }
+    }
+
+    pub fn insert(&mut self, value: FacetValueHit) -> ControlFlow<()> {
+        match self {
+            ValuesCollection::Lexicographic { max, content } => {
+                if content.len() < *max {
+                    content.push(value);
+                    if content.len() < *max {
+                        return ControlFlow::Continue(());
+                    }
+                }
+                ControlFlow::Break(())
+            }
+            ValuesCollection::Count { max, content } => {
+                if content.len() == *max {
+                    // Peeking gives us the worst value in the list as
+                    // this is a max-heap and we reversed it.
+                    let Some(mut peek) = content.peek_mut() else { return ControlFlow::Break(()) };
+                    if peek.0.count <= value.count {
+                        // Replace the current worst value in the heap
+                        // with the new one we received that is better.
+                        *peek = Reverse(value);
+                    }
+                } else {
+                    content.push(Reverse(value));
+                }
+                ControlFlow::Continue(())
+            }
+        }
+    }
+
+    /// Returns the list of facet values in descending order of, either,
+    /// count or lexicographic order of the value depending on the type.
+    pub fn into_sorted_vec(self) -> Vec<FacetValueHit> {
+        match self {
+            ValuesCollection::Lexicographic { content, .. } => content.into_iter().collect(),
+            ValuesCollection::Count { content, .. } => {
+                // Convert the heap into a vec of hits by removing the Reverse wrapper.
+                // Hits are already in the right order as they were reversed and there
+                // are output in ascending order.
+                content.into_sorted_vec().into_iter().map(|Reverse(hit)| hit).collect()
+            }
+        }
+    }
+}
+fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
+    let options = NormalizerOption { lossy: true, ..Default::default() };
+    let mut detection = StrDetection::new(facet_string, locales);
+
+    // Detect the language of the facet string only if several locales are explicitly provided.
+    let language = match locales {
+        Some(&[language]) => Some(language),
+        Some(multiple_locales) if multiple_locales.len() > 1 => detection.language(),
+        _ => None,
+    };
+
+    let token = Token {
+        lemma: std::borrow::Cow::Borrowed(facet_string),
+        script: detection.script(),
+        language,
+        ..Default::default()
+    };
+
+    token.normalize(&options).lemma.into_owned()
+}
--- a/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/0.snap
@ -0,0 +1,260 @@
+---
+source: milli/src/search/facet/facet_distribution_iter.rs
+---
+0: 1
+1: 1
+2: 1
+3: 1
+4: 1
+5: 1
+6: 1
+7: 1
+8: 1
+9: 1
+10: 1
+11: 1
+12: 1
+13: 1
+14: 1
+15: 1
+16: 1
+17: 1
+18: 1
+19: 1
+20: 1
+21: 1
+22: 1
+23: 1
+24: 1
+25: 1
+26: 1
+27: 1
+28: 1
+29: 1
+30: 1
+31: 1
+32: 1
+33: 1
+34: 1
+35: 1
+36: 1
+37: 1
+38: 1
+39: 1
+40: 1
+41: 1
+42: 1
+43: 1
+44: 1
+45: 1
+46: 1
+47: 1
+48: 1
+49: 1
+50: 1
+51: 1
+52: 1
+53: 1
+54: 1
+55: 1
+56: 1
+57: 1
+58: 1
+59: 1
+60: 1
+61: 1
+62: 1
+63: 1
+64: 1
+65: 1
+66: 1
+67: 1
+68: 1
+69: 1
+70: 1
+71: 1
+72: 1
+73: 1
+74: 1
+75: 1
+76: 1
+77: 1
+78: 1
+79: 1
+80: 1
+81: 1
+82: 1
+83: 1
+84: 1
+85: 1
+86: 1
+87: 1
+88: 1
+89: 1
+90: 1
+91: 1
+92: 1
+93: 1
+94: 1
+95: 1
+96: 1
+97: 1
+98: 1
+99: 1
+100: 1
+101: 1
+102: 1
+103: 1
+104: 1
+105: 1
+106: 1
+107: 1
+108: 1
+109: 1
+110: 1
+111: 1
+112: 1
+113: 1
+114: 1
+115: 1
+116: 1
+117: 1
+118: 1
+119: 1
+120: 1
+121: 1
+122: 1
+123: 1
+124: 1
+125: 1
+126: 1
+127: 1
+128: 1
+129: 1
+130: 1
+131: 1
+132: 1
+133: 1
+134: 1
+135: 1
+136: 1
+137: 1
+138: 1
+139: 1
+140: 1
+141: 1
+142: 1
+143: 1
+144: 1
+145: 1
+146: 1
+147: 1
+148: 1
+149: 1
+150: 1
+151: 1
+152: 1
+153: 1
+154: 1
+155: 1
+156: 1
+157: 1
+158: 1
+159: 1
+160: 1
+161: 1
+162: 1
+163: 1
+164: 1
+165: 1
+166: 1
+167: 1
+168: 1
+169: 1
+170: 1
+171: 1
+172: 1
+173: 1
+174: 1
+175: 1
+176: 1
+177: 1
+178: 1
+179: 1
+180: 1
+181: 1
+182: 1
+183: 1
+184: 1
+185: 1
+186: 1
+187: 1
+188: 1
+189: 1
+190: 1
+191: 1
+192: 1
+193: 1
+194: 1
+195: 1
+196: 1
+197: 1
+198: 1
+199: 1
+200: 1
+201: 1
+202: 1
+203: 1
+204: 1
+205: 1
+206: 1
+207: 1
+208: 1
+209: 1
+210: 1
+211: 1
+212: 1
+213: 1
+214: 1
+215: 1
+216: 1
+217: 1
+218: 1
+219: 1
+220: 1
+221: 1
+222: 1
+223: 1
+224: 1
+225: 1
+226: 1
+227: 1
+228: 1
+229: 1
+230: 1
+231: 1
+232: 1
+233: 1
+234: 1
+235: 1
+236: 1
+237: 1
+238: 1
+239: 1
+240: 1
+241: 1
+242: 1
+243: 1
+244: 1
+245: 1
+246: 1
+247: 1
+248: 1
+249: 1
+250: 1
+251: 1
+252: 1
+253: 1
+254: 1
+255: 1
+
--- a/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all/1.snap
@ -0,0 +1,105 @@
+---
+source: milli/src/search/facet/facet_distribution_iter.rs
+---
+3: 2
+5: 2
+6: 2
+9: 2
+10: 2
+11: 2
+14: 2
+18: 2
+19: 2
+24: 2
+26: 2
+28: 2
+29: 2
+32: 2
+33: 2
+35: 2
+36: 2
+37: 2
+38: 2
+39: 2
+41: 2
+46: 2
+47: 2
+49: 2
+52: 2
+53: 2
+55: 2
+59: 2
+61: 2
+64: 2
+68: 2
+71: 2
+74: 2
+75: 2
+76: 2
+81: 2
+83: 2
+85: 2
+86: 2
+88: 2
+90: 2
+91: 2
+92: 2
+98: 2
+99: 2
+101: 2
+102: 2
+103: 2
+107: 2
+111: 2
+115: 2
+119: 2
+123: 2
+124: 2
+130: 2
+131: 2
+133: 2
+135: 2
+136: 2
+137: 2
+139: 2
+141: 2
+143: 2
+144: 2
+147: 2
+150: 2
+156: 1
+158: 1
+160: 1
+162: 1
+163: 1
+164: 1
+167: 1
+169: 1
+173: 1
+177: 1
+178: 1
+179: 1
+181: 1
+182: 1
+186: 1
+189: 1
+192: 1
+193: 1
+195: 1
+197: 1
+205: 1
+206: 1
+207: 1
+208: 1
+209: 1
+210: 1
+216: 1
+219: 1
+220: 1
+223: 1
+226: 1
+235: 1
+236: 1
+238: 1
+243: 1
+
--- a/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/0.snap
@ -0,0 +1,104 @@
+---
+source: milli/src/search/facet/facet_distribution_iter.rs
+---
+0: 1
+1: 1
+2: 1
+3: 1
+4: 1
+5: 1
+6: 1
+7: 1
+8: 1
+9: 1
+10: 1
+11: 1
+12: 1
+13: 1
+14: 1
+15: 1
+16: 1
+17: 1
+18: 1
+19: 1
+20: 1
+21: 1
+22: 1
+23: 1
+24: 1
+25: 1
+26: 1
+27: 1
+28: 1
+29: 1
+30: 1
+31: 1
+32: 1
+33: 1
+34: 1
+35: 1
+36: 1
+37: 1
+38: 1
+39: 1
+40: 1
+41: 1
+42: 1
+43: 1
+44: 1
+45: 1
+46: 1
+47: 1
+48: 1
+49: 1
+50: 1
+51: 1
+52: 1
+53: 1
+54: 1
+55: 1
+56: 1
+57: 1
+58: 1
+59: 1
+60: 1
+61: 1
+62: 1
+63: 1
+64: 1
+65: 1
+66: 1
+67: 1
+68: 1
+69: 1
+70: 1
+71: 1
+72: 1
+73: 1
+74: 1
+75: 1
+76: 1
+77: 1
+78: 1
+79: 1
+80: 1
+81: 1
+82: 1
+83: 1
+84: 1
+85: 1
+86: 1
+87: 1
+88: 1
+89: 1
+90: 1
+91: 1
+92: 1
+93: 1
+94: 1
+95: 1
+96: 1
+97: 1
+98: 1
+99: 1
+
--- a/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_distribution_iter.rs/filter_distribution_all_stop_early/1.snap
@ -0,0 +1,104 @@
+---
+source: milli/src/search/facet/facet_distribution_iter.rs
+---
+3: 2
+5: 2
+6: 2
+9: 2
+10: 2
+11: 2
+14: 2
+18: 2
+19: 2
+24: 2
+26: 2
+28: 2
+29: 2
+32: 2
+33: 2
+35: 2
+36: 2
+37: 2
+38: 2
+39: 2
+41: 2
+46: 2
+47: 2
+49: 2
+52: 2
+53: 2
+55: 2
+59: 2
+61: 2
+64: 2
+68: 2
+71: 2
+74: 2
+75: 2
+76: 2
+81: 2
+83: 2
+85: 2
+86: 2
+88: 2
+90: 2
+91: 2
+92: 2
+98: 2
+99: 2
+101: 2
+102: 2
+103: 2
+107: 2
+111: 2
+115: 2
+119: 2
+123: 2
+124: 2
+130: 2
+131: 2
+133: 2
+135: 2
+136: 2
+137: 2
+139: 2
+141: 2
+143: 2
+144: 2
+147: 2
+150: 2
+156: 1
+158: 1
+160: 1
+162: 1
+163: 1
+164: 1
+167: 1
+169: 1
+173: 1
+177: 1
+178: 1
+179: 1
+181: 1
+182: 1
+186: 1
+189: 1
+192: 1
+193: 1
+195: 1
+197: 1
+205: 1
+206: 1
+207: 1
+208: 1
+209: 1
+210: 1
+216: 1
+219: 1
+220: 1
+223: 1
+226: 1
+235: 1
+236: 1
+238: 1
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+adf484f467a31ee9460dec539621938a
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+c9939aa4977fcd4bfd35852e102dbc82
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+adf484f467a31ee9460dec539621938a
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/excluded_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+c9939aa4977fcd4bfd35852e102dbc82
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+618738d28ff1386b6e93d171a5acb08f
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+ffb62ab3eef55c2254c13dc0f4099849
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+618738d28ff1386b6e93d171a5acb08f
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_decreasing/included_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+ffb62ab3eef55c2254c13dc0f4099849
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+9c25261cec7275cb5cfd85835904d023
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+2f97f18c15e915853e4df879be6e1f63
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+9c25261cec7275cb5cfd85835904d023
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_0_exact_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+2f97f18c15e915853e4df879be6e1f63
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_0.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_0.snap
@ -0,0 +1,260 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+0: []
+1: []
+2: []
+3: []
+4: []
+5: []
+6: []
+7: []
+8: []
+9: []
+10: []
+11: []
+12: []
+13: []
+14: []
+15: []
+16: []
+17: []
+18: []
+19: []
+20: []
+21: []
+22: []
+23: []
+24: []
+25: []
+26: []
+27: []
+28: []
+29: []
+30: []
+31: []
+32: []
+33: []
+34: []
+35: []
+36: []
+37: []
+38: []
+39: []
+40: []
+41: []
+42: []
+43: []
+44: []
+45: []
+46: []
+47: []
+48: []
+49: []
+50: []
+51: []
+52: []
+53: []
+54: []
+55: []
+56: []
+57: []
+58: []
+59: []
+60: []
+61: []
+62: []
+63: []
+64: []
+65: []
+66: []
+67: []
+68: []
+69: []
+70: []
+71: []
+72: []
+73: []
+74: []
+75: []
+76: []
+77: []
+78: []
+79: []
+80: []
+81: []
+82: []
+83: []
+84: []
+85: []
+86: []
+87: []
+88: []
+89: []
+90: []
+91: []
+92: []
+93: []
+94: []
+95: []
+96: []
+97: []
+98: []
+99: []
+100: []
+101: []
+102: []
+103: []
+104: []
+105: []
+106: []
+107: []
+108: []
+109: []
+110: []
+111: []
+112: []
+113: []
+114: []
+115: []
+116: []
+117: []
+118: []
+119: []
+120: []
+121: []
+122: []
+123: []
+124: []
+125: []
+126: []
+127: []
+128: []
+129: []
+130: []
+131: []
+132: []
+133: []
+134: []
+135: []
+136: []
+137: []
+138: []
+139: []
+140: []
+141: []
+142: []
+143: []
+144: []
+145: []
+146: []
+147: []
+148: []
+149: []
+150: []
+151: []
+152: []
+153: []
+154: []
+155: []
+156: []
+157: []
+158: []
+159: []
+160: []
+161: []
+162: []
+163: []
+164: []
+165: []
+166: []
+167: []
+168: []
+169: []
+170: []
+171: []
+172: []
+173: []
+174: []
+175: []
+176: []
+177: []
+178: []
+179: []
+180: []
+181: []
+182: []
+183: []
+184: []
+185: []
+186: []
+187: []
+188: []
+189: []
+190: []
+191: []
+192: []
+193: []
+194: []
+195: []
+196: []
+197: []
+198: []
+199: []
+200: []
+201: []
+202: []
+203: []
+204: []
+205: []
+206: []
+207: []
+208: []
+209: []
+210: []
+211: []
+212: []
+213: []
+214: []
+215: []
+216: []
+217: []
+218: []
+219: []
+220: []
+221: []
+222: []
+223: []
+224: []
+225: []
+226: []
+227: []
+228: []
+229: []
+230: []
+231: []
+232: []
+233: []
+234: []
+235: []
+236: []
+237: []
+238: []
+239: []
+240: []
+241: []
+242: []
+243: []
+244: []
+245: []
+246: []
+247: []
+248: []
+249: []
+250: []
+251: []
+252: []
+253: []
+254: []
+255: []
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_1.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_1.snap
@ -0,0 +1,260 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+0: []
+1: []
+2: []
+3: []
+4: []
+5: []
+6: []
+7: []
+8: []
+9: []
+10: []
+11: []
+12: []
+13: []
+14: []
+15: []
+16: []
+17: []
+18: []
+19: []
+20: []
+21: []
+22: []
+23: []
+24: []
+25: []
+26: []
+27: []
+28: []
+29: []
+30: []
+31: []
+32: []
+33: []
+34: []
+35: []
+36: []
+37: []
+38: []
+39: []
+40: []
+41: []
+42: []
+43: []
+44: []
+45: []
+46: []
+47: []
+48: []
+49: []
+50: []
+51: []
+52: []
+53: []
+54: []
+55: []
+56: []
+57: []
+58: []
+59: []
+60: []
+61: []
+62: []
+63: []
+64: []
+65: []
+66: []
+67: []
+68: []
+69: []
+70: []
+71: []
+72: []
+73: []
+74: []
+75: []
+76: []
+77: []
+78: []
+79: []
+80: []
+81: []
+82: []
+83: []
+84: []
+85: []
+86: []
+87: []
+88: []
+89: []
+90: []
+91: []
+92: []
+93: []
+94: []
+95: []
+96: []
+97: []
+98: []
+99: []
+100: []
+101: []
+102: []
+103: []
+104: []
+105: []
+106: []
+107: []
+108: []
+109: []
+110: []
+111: []
+112: []
+113: []
+114: []
+115: []
+116: []
+117: []
+118: []
+119: []
+120: []
+121: []
+122: []
+123: []
+124: []
+125: []
+126: []
+127: []
+128: []
+129: []
+130: []
+131: []
+132: []
+133: []
+134: []
+135: []
+136: []
+137: []
+138: []
+139: []
+140: []
+141: []
+142: []
+143: []
+144: []
+145: []
+146: []
+147: []
+148: []
+149: []
+150: []
+151: []
+152: []
+153: []
+154: []
+155: []
+156: []
+157: []
+158: []
+159: []
+160: []
+161: []
+162: []
+163: []
+164: []
+165: []
+166: []
+167: []
+168: []
+169: []
+170: []
+171: []
+172: []
+173: []
+174: []
+175: []
+176: []
+177: []
+178: []
+179: []
+180: []
+181: []
+182: []
+183: []
+184: []
+185: []
+186: []
+187: []
+188: []
+189: []
+190: []
+191: []
+192: []
+193: []
+194: []
+195: []
+196: []
+197: []
+198: []
+199: []
+200: []
+201: []
+202: []
+203: []
+204: []
+205: []
+206: []
+207: []
+208: []
+209: []
+210: []
+211: []
+212: []
+213: []
+214: []
+215: []
+216: []
+217: []
+218: []
+219: []
+220: []
+221: []
+222: []
+223: []
+224: []
+225: []
+226: []
+227: []
+228: []
+229: []
+230: []
+231: []
+232: []
+233: []
+234: []
+235: []
+236: []
+237: []
+238: []
+239: []
+240: []
+241: []
+242: []
+243: []
+244: []
+245: []
+246: []
+247: []
+248: []
+249: []
+250: []
+251: []
+252: []
+253: []
+254: []
+255: []
+
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+9c25261cec7275cb5cfd85835904d023
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_exact/field_id_1_exact_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+2f97f18c15e915853e4df879be6e1f63
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+e849066b0e43d5c456f086c552372afc
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+8cc5e82995b0443b660f419bb9ea2e85
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+e849066b0e43d5c456f086c552372afc
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/excluded_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+8cc5e82995b0443b660f419bb9ea2e85
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+73b48005dc57b04f0939bbf21a68dab6
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+3c23d35627667dcee98468bfdecf09d3
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+73b48005dc57b04f0939bbf21a68dab6
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_increasing/included_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+3c23d35627667dcee98468bfdecf09d3
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+c3f8b0b858a4820a508b25b42328cedd
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+38a42f5dc25e99d7a5312a63ce94ed30
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+c3f8b0b858a4820a508b25b42328cedd
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/excluded_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+38a42f5dc25e99d7a5312a63ce94ed30
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+2049930204498b323885c91de88e44ca
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_1.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_1.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+7f0ca8c0fc6494f3dba46e8eb9699045
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_2.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_2.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+2049930204498b323885c91de88e44ca
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_3.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_pinch/included_3.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+7f0ca8c0fc6494f3dba46e8eb9699045
--- a/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_0.hash.snap
+++ b/crates/milli/src/search/facet/snapshots/facet_range_search.rs/filter_range_unbounded/end_at_included_0.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/search/facet/facet_range_search.rs
+---
+ad8fc873747aaf1d3590e7ccab735985
--- a/Show more
+++ b/Show more