Merge pull request #403 from meilisearch/lazy-data-fetching

Criteria lazy data preparation
2025-07-04 20:37:15 +02:00 · 2019-12-13 14:57:19 +01:00 · 2019-12-13 14:57:19 +01:00 · 020cd7f9e8
commit 020cd7f9e8
parent 5b9fff6636 40c0b14d1c
29 changed files with 1881 additions and 2403 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -257,6 +257,11 @@ dependencies = [
 "bitflags 1.2.1 (registry+https://github.com/rust-lang/crates.io-index)",
 ]

+[[package]]
+name = "compact_arena"
+version = "0.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+
 [[package]]
 name = "const-random"
 version = "0.1.6"
@ -937,6 +942,7 @@ dependencies = [
 "bincode 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "chrono 0.4.9 (registry+https://github.com/rust-lang/crates.io-index)",
+ "compact_arena 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "criterion 0.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "crossbeam-channel 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -946,6 +952,8 @@ dependencies = [
 "hashbrown 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)",
 "heed 0.6.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)",
+ "itertools 0.8.2 (registry+https://github.com/rust-lang/crates.io-index)",
+ "jemallocator 0.3.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "levenshtein_automata 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)",
 "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)",
 "meilisearch-schema 0.8.4",
@ -954,7 +962,7 @@ dependencies = [
 "once_cell 1.2.0 (registry+https://github.com/rust-lang/crates.io-index)",
 "ordered-float 1.0.2 (registry+https://github.com/rust-lang/crates.io-index)",
 "rustyline 5.0.4 (registry+https://github.com/rust-lang/crates.io-index)",
- "sdset 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)",
+ "sdset 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)",
 "serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)",
 "serde_json 1.0.41 (registry+https://github.com/rust-lang/crates.io-index)",
 "siphasher 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)",
@ -1684,7 +1692,7 @@ dependencies = [

 [[package]]
 name = "sdset"
-version = "0.3.3"
+version = "0.3.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"

 [[package]]
@ -2648,6 +2656,7 @@ dependencies = [
 "checksum chunked_transfer 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "f98beb6554de08a14bd7b5c6014963c79d6a25a1c66b1d4ecb9e733ccba51d6c"
 "checksum clap 2.33.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5067f5bb2d80ef5d68b4c87db81601f0b75bca627bc2ef76b141d7b846a3c6d9"
 "checksum cloudabi 0.0.3 (registry+https://github.com/rust-lang/crates.io-index)" = "ddfc5b9aa5d4507acaf872de71051dfd0e309860e88966e1051e462a077aac4f"
+"checksum compact_arena 0.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "4ab08c5bed92075075d5db5149887a477b2dc0318c40882a0dfbd34315ac6141"
 "checksum const-random 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7b641a8c9867e341f3295564203b1c250eb8ce6cb6126e007941f78c4d2ed7fe"
 "checksum const-random-macro 0.1.6 (registry+https://github.com/rust-lang/crates.io-index)" = "c750ec12b83377637110d5a57f5ae08e895b06c4b16e2bdbf1a94ef717428c59"
 "checksum cookie 0.12.0 (registry+https://github.com/rust-lang/crates.io-index)" = "888604f00b3db336d2af898ec3c1d5d0ddf5e6d462220f2ededc33a87ac4bbd5"
@ -2798,7 +2807,7 @@ dependencies = [
 "checksum same-file 1.0.5 (registry+https://github.com/rust-lang/crates.io-index)" = "585e8ddcedc187886a30fa705c47985c3fa88d06624095856b36ca0b82ff4421"
 "checksum scopeguard 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "b42e15e59b18a828bbf5c58ea01debb36b9b096346de35d941dcb89009f24a0d"
 "checksum sct 0.6.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e3042af939fca8c3453b7af0f1c66e533a15a86169e39de2657310ade8f98d3c"
-"checksum sdset 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "b6d2447743d6c37b6d67af88d9c0f1fc92989e2d9745d9b2f3d305b906a90195"
+"checksum sdset 0.3.6 (registry+https://github.com/rust-lang/crates.io-index)" = "5bfd7aab2bcae693c563b40fbbaf87d60c9b6f2a60d55ed69a9c761e3d4c63c9"
 "checksum semver 0.9.0 (registry+https://github.com/rust-lang/crates.io-index)" = "1d7eb9ef2c18661902cc47e535f9bc51b78acd254da71d375c2f6720d9a40403"
 "checksum semver-parser 0.7.0 (registry+https://github.com/rust-lang/crates.io-index)" = "388a1df253eca08550bef6c72392cfe7c30914bf41df5269b68cbd6ff8f570a3"
 "checksum serde 1.0.102 (registry+https://github.com/rust-lang/crates.io-index)" = "0c4b39bd9b0b087684013a792c59e3e07a46a01d2322518d8a1104641a0b1be0"
--- a/meilisearch-core/Cargo.toml
+++ b/meilisearch-core/Cargo.toml
@ -10,12 +10,14 @@ arc-swap = "0.4.3"
 bincode = "1.1.4"
 byteorder = "1.3.2"
 chrono = { version = "0.4.9", features = ["serde"] }
+compact_arena = "0.4.0"
 crossbeam-channel = "0.4.0"
 deunicode = "1.0.0"
 env_logger = "0.7.0"
 fst = { version = "0.3.5", default-features = false }
 hashbrown = { version = "0.6.0", features = ["serde"] }
 heed = "0.6.1"
+itertools = "0.8.2" # kill me please
 levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }
 log = "0.4.8"
 meilisearch-schema = { path = "../meilisearch-schema", version = "0.8.4" }
@ -23,7 +25,7 @@ meilisearch-tokenizer = { path = "../meilisearch-tokenizer", version = "0.8.4" }
 meilisearch-types = { path = "../meilisearch-types", version = "0.8.4" }
 once_cell = "1.2.0"
 ordered-float = { version = "1.0.2", features = ["serde"] }
-sdset = "0.3.3"
+sdset = "0.3.6"
 serde = { version = "1.0.101", features = ["derive"] }
 serde_json = "1.0.41"
 siphasher = "0.3.1"
@ -35,6 +37,7 @@ assert_matches = "1.3"
 criterion = "0.3"
 csv = "1.0.7"
 indexmap = { version = "1.2.0", features = ["serde-1"] }
+jemallocator = "0.3.2"
 rustyline = { version = "5.0.0", default-features = false }
 structopt = "0.3.2"
 tempfile = "3.1.0"
--- a/meilisearch-core/examples/from_file.rs
+++ b/meilisearch-core/examples/from_file.rs
@ -1,5 +1,5 @@
-use std::collections::btree_map::{BTreeMap, Entry};
 use std::collections::HashSet;
+use std::collections::btree_map::{BTreeMap, Entry};
 use std::error::Error;
 use std::io::{Read, Write};
 use std::iter::FromIterator;
@ -15,6 +15,10 @@ use termcolor::{Color, ColorChoice, ColorSpec, StandardStream, WriteColor};
 use meilisearch_core::{Database, Highlight, ProcessedUpdateResult};
 use meilisearch_schema::SchemaAttr;

+// #[cfg(target_os = "linux")]
+#[global_allocator]
+static ALLOC: jemallocator::Jemalloc = jemallocator::Jemalloc;
+
 #[derive(Debug, StructOpt)]
 struct IndexCommand {
    /// The destination where the database must be created.
--- a/meilisearch-core/src/automaton/dfa.rs
+++ b/meilisearch-core/src/automaton/dfa.rs
@ -46,3 +46,8 @@ pub fn build_prefix_dfa(query: &str) -> DFA {
 pub fn build_dfa(query: &str) -> DFA {
    build_dfa_with_setting(query, PrefixSetting::NoPrefix)
 }
+
+pub fn build_exact_dfa(query: &str) -> DFA {
+    let builder = LEVDIST0.get_or_init(|| LevBuilder::new(0, true));
+    builder.build_dfa(query)
+}
--- a/meilisearch-core/src/automaton/mod.rs
+++ b/meilisearch-core/src/automaton/mod.rs
@ -1,125 +1,13 @@
 mod dfa;
 mod query_enhancer;

-use std::cmp::Reverse;
-use std::{cmp, vec};
+use meilisearch_tokenizer::is_cjk;

-use fst::{IntoStreamer, Streamer};
-use levenshtein_automata::DFA;
-use meilisearch_tokenizer::{is_cjk, split_query_string};
-
-use crate::database::MainT;
-use crate::error::MResult;
-use crate::store;
-
-use self::dfa::{build_dfa, build_prefix_dfa};
+pub use self::dfa::{build_dfa, build_prefix_dfa, build_exact_dfa};
 pub use self::query_enhancer::QueryEnhancer;
-use self::query_enhancer::QueryEnhancerBuilder;
+pub use self::query_enhancer::QueryEnhancerBuilder;

-const NGRAMS: usize = 3;
-
-pub struct AutomatonProducer {
-    automatons: Vec<AutomatonGroup>,
-}
-
-impl AutomatonProducer {
-    pub fn new(
-        reader: &heed::RoTxn<MainT>,
-        query: &str,
-        main_store: store::Main,
-        postings_list_store: store::PostingsLists,
-        synonyms_store: store::Synonyms,
-    ) -> MResult<(AutomatonProducer, QueryEnhancer)> {
-        let (automatons, query_enhancer) = generate_automatons(
-            reader,
-            query,
-            main_store,
-            postings_list_store,
-            synonyms_store,
-        )?;
-
-        Ok((AutomatonProducer { automatons }, query_enhancer))
-    }
-
-    pub fn into_iter(self) -> vec::IntoIter<AutomatonGroup> {
-        self.automatons.into_iter()
-    }
-}
-
-#[derive(Debug)]
-pub struct AutomatonGroup {
-    pub is_phrase_query: bool,
-    pub automatons: Vec<Automaton>,
-}
-
-impl AutomatonGroup {
-    fn normal(automatons: Vec<Automaton>) -> AutomatonGroup {
-        AutomatonGroup {
-            is_phrase_query: false,
-            automatons,
-        }
-    }
-
-    fn phrase_query(automatons: Vec<Automaton>) -> AutomatonGroup {
-        AutomatonGroup {
-            is_phrase_query: true,
-            automatons,
-        }
-    }
-}
-
-#[derive(Debug)]
-pub struct Automaton {
-    pub index: usize,
-    pub ngram: usize,
-    pub query_len: usize,
-    pub is_exact: bool,
-    pub is_prefix: bool,
-    pub query: String,
-}
-
-impl Automaton {
-    pub fn dfa(&self) -> DFA {
-        if self.is_prefix {
-            build_prefix_dfa(&self.query)
-        } else {
-            build_dfa(&self.query)
-        }
-    }
-
-    fn exact(index: usize, ngram: usize, query: &str) -> Automaton {
-        Automaton {
-            index,
-            ngram,
-            query_len: query.len(),
-            is_exact: true,
-            is_prefix: false,
-            query: query.to_string(),
-        }
-    }
-
-    fn prefix_exact(index: usize, ngram: usize, query: &str) -> Automaton {
-        Automaton {
-            index,
-            ngram,
-            query_len: query.len(),
-            is_exact: true,
-            is_prefix: true,
-            query: query.to_string(),
-        }
-    }
-
-    fn non_exact(index: usize, ngram: usize, query: &str) -> Automaton {
-        Automaton {
-            index,
-            ngram,
-            query_len: query.len(),
-            is_exact: false,
-            is_prefix: false,
-            query: query.to_string(),
-        }
-    }
-}
+pub const NGRAMS: usize = 3;

 pub fn normalize_str(string: &str) -> String {
    let mut string = string.to_lowercase();
@ -130,167 +18,3 @@ pub fn normalize_str(string: &str) -> String {

    string
 }
-
-fn split_best_frequency<'a>(
-    reader: &heed::RoTxn<MainT>,
-    word: &'a str,
-    postings_lists_store: store::PostingsLists,
-) -> MResult<Option<(&'a str, &'a str)>> {
-    let chars = word.char_indices().skip(1);
-    let mut best = None;
-
-    for (i, _) in chars {
-        let (left, right) = word.split_at(i);
-
-        let left_freq = postings_lists_store
-            .postings_list(reader, left.as_ref())?
-            .map_or(0, |i| i.len());
-
-        let right_freq = postings_lists_store
-            .postings_list(reader, right.as_ref())?
-            .map_or(0, |i| i.len());
-
-        let min_freq = cmp::min(left_freq, right_freq);
-        if min_freq != 0 && best.map_or(true, |(old, _, _)| min_freq > old) {
-            best = Some((min_freq, left, right));
-        }
-    }
-
-    Ok(best.map(|(_, l, r)| (l, r)))
-}
-
-fn generate_automatons(
-    reader: &heed::RoTxn<MainT>,
-    query: &str,
-    main_store: store::Main,
-    postings_lists_store: store::PostingsLists,
-    synonym_store: store::Synonyms,
-) -> MResult<(Vec<AutomatonGroup>, QueryEnhancer)> {
-    let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
-    let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
-    let synonyms = match main_store.synonyms_fst(reader)? {
-        Some(synonym) => synonym,
-        None => fst::Set::default(),
-    };
-
-    let mut automaton_index = 0;
-    let mut automatons = Vec::new();
-    let mut enhancer_builder = QueryEnhancerBuilder::new(&query_words);
-
-    // We must not declare the original words to the query enhancer
-    // *but* we need to push them in the automatons list first
-    let mut original_automatons = Vec::new();
-    let mut original_words = query_words.iter().peekable();
-    while let Some(word) = original_words.next() {
-        let has_following_word = original_words.peek().is_some();
-        let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);
-
-        let automaton = if not_prefix_dfa {
-            Automaton::exact(automaton_index, 1, word)
-        } else {
-            Automaton::prefix_exact(automaton_index, 1, word)
-        };
-        automaton_index += 1;
-        original_automatons.push(automaton);
-    }
-
-    automatons.push(AutomatonGroup::normal(original_automatons));
-
-    for n in 1..=NGRAMS {
-        let mut ngrams = query_words.windows(n).enumerate().peekable();
-        while let Some((query_index, ngram_slice)) = ngrams.next() {
-            let query_range = query_index..query_index + n;
-            let ngram_nb_words = ngram_slice.len();
-            let ngram = ngram_slice.join(" ");
-
-            let has_following_word = ngrams.peek().is_some();
-            let not_prefix_dfa =
-                has_following_word || has_end_whitespace || ngram.chars().all(is_cjk);
-
-            // automaton of synonyms of the ngrams
-            let normalized = normalize_str(&ngram);
-            let lev = if not_prefix_dfa {
-                build_dfa(&normalized)
-            } else {
-                build_prefix_dfa(&normalized)
-            };
-
-            let mut stream = synonyms.search(&lev).into_stream();
-            while let Some(base) = stream.next() {
-                // only trigger alternatives when the last word has been typed
-                // i.e. "new " do not but "new yo" triggers alternatives to "new york"
-                let base = std::str::from_utf8(base).unwrap();
-                let base_nb_words = split_query_string(base).count();
-                if ngram_nb_words != base_nb_words {
-                    continue;
-                }
-
-                if let Some(synonyms) = synonym_store.synonyms(reader, base.as_bytes())? {
-                    let mut stream = synonyms.into_stream();
-                    while let Some(synonyms) = stream.next() {
-                        let synonyms = std::str::from_utf8(synonyms).unwrap();
-                        let synonyms_words: Vec<_> = split_query_string(synonyms).collect();
-                        let nb_synonym_words = synonyms_words.len();
-
-                        let real_query_index = automaton_index;
-                        enhancer_builder.declare(
-                            query_range.clone(),
-                            real_query_index,
-                            &synonyms_words,
-                        );
-
-                        for synonym in synonyms_words {
-                            let automaton = if nb_synonym_words == 1 {
-                                Automaton::exact(automaton_index, n, synonym)
-                            } else {
-                                Automaton::non_exact(automaton_index, n, synonym)
-                            };
-                            automaton_index += 1;
-                            automatons.push(AutomatonGroup::normal(vec![automaton]));
-                        }
-                    }
-                }
-            }
-
-            if n == 1 {
-                if let Some((left, right)) =
-                    split_best_frequency(reader, &normalized, postings_lists_store)?
-                {
-                    let a = Automaton::exact(automaton_index, 1, left);
-                    enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);
-                    automaton_index += 1;
-
-                    let b = Automaton::exact(automaton_index, 1, right);
-                    enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);
-                    automaton_index += 1;
-
-                    automatons.push(AutomatonGroup::phrase_query(vec![a, b]));
-                }
-            } else {
-                // automaton of concatenation of query words
-                let concat = ngram_slice.concat();
-                let normalized = normalize_str(&concat);
-
-                let real_query_index = automaton_index;
-                enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);
-
-                let automaton = Automaton::exact(automaton_index, n, &normalized);
-                automaton_index += 1;
-                automatons.push(AutomatonGroup::normal(vec![automaton]));
-            }
-        }
-    }
-
-    // order automatons, the most important first,
-    // we keep the original automatons at the front.
-    automatons[1..].sort_by_key(|group| {
-        let a = group.automatons.first().unwrap();
-        (
-            Reverse(a.is_exact),
-            a.ngram,
-            Reverse(group.automatons.len()),
-        )
-    });
-
-    Ok((automatons, enhancer_builder.build()))
-}
--- a/meilisearch-core/src/automaton/query_enhancer.rs
+++ b/meilisearch-core/src/automaton/query_enhancer.rs
@ -58,6 +58,7 @@ where
 type Origin = usize;
 type RealLength = usize;

+#[derive(Debug)]
 struct FakeIntervalTree {
    intervals: Vec<(Range<usize>, (Origin, RealLength))>,
 }
@ -142,67 +143,80 @@ impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
        // we need to pad real query indices
        let real_range = real..real + replacement.len().max(range.len());
        let real_length = replacement.len();
-        self.real_to_origin
-            .push((real_range, (range.start, real_length)));
+        self.real_to_origin.push((real_range, (range.start, real_length)));
    }

    pub fn build(self) -> QueryEnhancer {
-        QueryEnhancer {
-            origins: self.origins,
-            real_to_origin: FakeIntervalTree::new(self.real_to_origin),
+        let interval_tree = FakeIntervalTree::new(self.real_to_origin);
+        let mut table = Vec::new();
+
+        for real in 0.. {
+            match replacement(&self.origins, &interval_tree, real) {
+                Some(range) => table.push(range),
+                None => break,
+            }
        }
+
+        QueryEnhancer { table }
    }
 }

+/// Returns the query indices that represent this real query index.
+fn replacement(
+    origins: &[usize],
+    real_to_origin: &FakeIntervalTree,
+    real: u32,
+) -> Option<Range<u32>>
+{
+    let real = real as usize;
+
+    // query the fake interval tree with the real query index
+    let (range, (origin, real_length)) = real_to_origin.query(real)?;
+
+    // if `real` is the end bound of the range
+    if (range.start + real_length - 1) == real {
+        let mut count = range.len();
+        let mut new_origin = origin;
+        for (i, slice) in origins[new_origin..].windows(2).enumerate() {
+            let len = slice[1] - slice[0];
+            count = count.saturating_sub(len);
+            if count == 0 {
+                new_origin = origin + i;
+                break;
+            }
+        }
+
+        let n = real - range.start;
+        let start = origins[origin];
+        let end = origins.get(new_origin + 1)?;
+        let remaining = (end - start) - n;
+
+        Some(Range {
+            start: (start + n) as u32,
+            end: (start + n + remaining) as u32,
+        })
+    } else {
+        // just return the origin along with
+        // the real position of the word
+        let n = real as usize - range.start;
+        let origin = origins[origin];
+
+        Some(Range {
+            start: (origin + n) as u32,
+            end: (origin + n + 1) as u32,
+        })
+    }
+}
+
+#[derive(Debug)]
 pub struct QueryEnhancer {
-    origins: Vec<usize>,
-    real_to_origin: FakeIntervalTree,
+    table: Vec<Range<u32>>,
 }

 impl QueryEnhancer {
-    /// Returns the query indices to use to replace this real query index.
+    /// Returns the query indices that represent this real query index.
    pub fn replacement(&self, real: u32) -> Range<u32> {
-        let real = real as usize;
-
-        // query the fake interval tree with the real query index
-        let (range, (origin, real_length)) = self
-            .real_to_origin
-            .query(real)
-            .expect("real has never been declared");
-
-        // if `real` is the end bound of the range
-        if (range.start + real_length - 1) == real {
-            let mut count = range.len();
-            let mut new_origin = origin;
-            for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
-                let len = slice[1] - slice[0];
-                count = count.saturating_sub(len);
-                if count == 0 {
-                    new_origin = origin + i;
-                    break;
-                }
-            }
-
-            let n = real - range.start;
-            let start = self.origins[origin];
-            let end = self.origins[new_origin + 1];
-            let remaining = (end - start) - n;
-
-            Range {
-                start: (start + n) as u32,
-                end: (start + n + remaining) as u32,
-            }
-        } else {
-            // just return the origin along with
-            // the real position of the word
-            let n = real as usize - range.start;
-            let origin = self.origins[origin];
-
-            Range {
-                start: (origin + n) as u32,
-                end: (origin + n + 1) as u32,
-            }
-        }
+        self.table[real as usize].clone()
    }
 }

--- a/meilisearch-core/src/bucket_sort.rs
+++ b/meilisearch-core/src/bucket_sort.rs
@ -0,0 +1,717 @@
+use std::ops::Deref;
+use std::{cmp, fmt};
+use std::borrow::Cow;
+use std::mem;
+use std::ops::Range;
+use std::rc::Rc;
+use std::time::{Duration, Instant};
+
+use compact_arena::{SmallArena, Idx32, mk_arena};
+use fst::{IntoStreamer, Streamer};
+use hashbrown::HashMap;
+use levenshtein_automata::DFA;
+use log::debug;
+use meilisearch_tokenizer::{is_cjk, split_query_string};
+use meilisearch_types::DocIndex;
+use sdset::{Set, SetBuf};
+use slice_group_by::{GroupBy, GroupByMut};
+
+use crate::automaton::NGRAMS;
+use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa};
+use crate::automaton::normalize_str;
+use crate::automaton::{QueryEnhancer, QueryEnhancerBuilder};
+
+use crate::criterion::{Criteria, Context, ContextMut};
+use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
+use crate::raw_document::RawDocument;
+use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
+use crate::{store, Document, DocumentId, MResult};
+
+pub fn bucket_sort<'c, FI>(
+    reader: &heed::RoTxn<MainT>,
+    query: &str,
+    range: Range<usize>,
+    filter: Option<FI>,
+    criteria: Criteria<'c>,
+    searchable_attrs: Option<ReorderedAttrs>,
+    main_store: store::Main,
+    postings_lists_store: store::PostingsLists,
+    documents_fields_counts_store: store::DocumentsFieldsCounts,
+    synonyms_store: store::Synonyms,
+) -> MResult<Vec<Document>>
+where
+    FI: Fn(DocumentId) -> bool,
+{
+    // We delegate the filter work to the distinct query builder,
+    // specifying a distinct rule that has no effect.
+    if filter.is_some() {
+        let distinct = |_| None;
+        let distinct_size = 1;
+        return bucket_sort_with_distinct(
+            reader,
+            query,
+            range,
+            filter,
+            distinct,
+            distinct_size,
+            criteria,
+            searchable_attrs,
+            main_store,
+            postings_lists_store,
+            documents_fields_counts_store,
+            synonyms_store,
+        );
+    }
+
+    let (mut automatons, mut query_enhancer) =
+        construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?;
+
+    debug!("{:?}", query_enhancer);
+
+    let before_postings_lists_fetching = Instant::now();
+    mk_arena!(arena);
+    let mut bare_matches =
+        fetch_matches(reader, &automatons, &mut arena, main_store, postings_lists_store)?;
+    debug!("bare matches ({}) retrieved in {:.02?}",
+        bare_matches.len(),
+        before_postings_lists_fetching.elapsed(),
+    );
+
+    let before_raw_documents_presort = Instant::now();
+    bare_matches.sort_unstable_by_key(|sm| sm.document_id);
+    debug!("sort by documents ids took {:.02?}", before_raw_documents_presort.elapsed());
+
+    let before_raw_documents_building = Instant::now();
+    let mut prefiltered_documents = 0;
+    let mut raw_documents = Vec::new();
+    for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
+        prefiltered_documents += 1;
+        if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena, searchable_attrs.as_ref()) {
+            raw_documents.push(raw_document);
+        }
+    }
+    debug!("creating {} (original {}) candidates documents took {:.02?}",
+        raw_documents.len(),
+        prefiltered_documents,
+        before_raw_documents_building.elapsed(),
+    );
+
+    let mut groups = vec![raw_documents.as_mut_slice()];
+
+    'criteria: for criterion in criteria.as_ref() {
+        let tmp_groups = mem::replace(&mut groups, Vec::new());
+        let mut documents_seen = 0;
+
+        for mut group in tmp_groups {
+            let before_criterion_preparation = Instant::now();
+
+            let ctx = ContextMut {
+                reader,
+                postings_lists: &mut arena,
+                query_enhancer: &mut query_enhancer,
+                automatons: &mut automatons,
+                documents_fields_counts_store,
+            };
+
+            criterion.prepare(ctx, &mut group)?;
+            debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
+
+            let ctx = Context {
+                postings_lists: &arena,
+                query_enhancer: &query_enhancer,
+                automatons: &automatons,
+            };
+
+            let before_criterion_sort = Instant::now();
+            group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
+            debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
+
+            for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
+                debug!("{:?} produced a group of size {}", criterion.name(), group.len());
+
+                documents_seen += group.len();
+                groups.push(group);
+
+                // we have sort enough documents if the last document sorted is after
+                // the end of the requested range, we can continue to the next criterion
+                if documents_seen >= range.end {
+                    continue 'criteria;
+                }
+            }
+        }
+    }
+
+    let iter = raw_documents.into_iter().skip(range.start).take(range.len());
+    let iter = iter.map(|rd| Document::from_raw(rd, &automatons, &arena, searchable_attrs.as_ref()));
+
+    Ok(iter.collect())
+}
+
+pub fn bucket_sort_with_distinct<'c, FI, FD>(
+    reader: &heed::RoTxn<MainT>,
+    query: &str,
+    range: Range<usize>,
+    filter: Option<FI>,
+    distinct: FD,
+    distinct_size: usize,
+    criteria: Criteria<'c>,
+    searchable_attrs: Option<ReorderedAttrs>,
+    main_store: store::Main,
+    postings_lists_store: store::PostingsLists,
+    documents_fields_counts_store: store::DocumentsFieldsCounts,
+    synonyms_store: store::Synonyms,
+) -> MResult<Vec<Document>>
+where
+    FI: Fn(DocumentId) -> bool,
+    FD: Fn(DocumentId) -> Option<u64>,
+{
+    let (mut automatons, mut query_enhancer) =
+        construct_automatons(reader, query, main_store, postings_lists_store, synonyms_store)?;
+
+    let before_postings_lists_fetching = Instant::now();
+    mk_arena!(arena);
+    let mut bare_matches = fetch_matches(reader, &automatons, &mut arena, main_store, postings_lists_store)?;
+    debug!("bare matches ({}) retrieved in {:.02?}",
+        bare_matches.len(),
+        before_postings_lists_fetching.elapsed(),
+    );
+
+    let before_raw_documents_presort = Instant::now();
+    bare_matches.sort_unstable_by_key(|sm| sm.document_id);
+    debug!("sort by documents ids took {:.02?}", before_raw_documents_presort.elapsed());
+
+    let before_raw_documents_building = Instant::now();
+    let mut prefiltered_documents = 0;
+    let mut raw_documents = Vec::new();
+    for bare_matches in bare_matches.linear_group_by_key_mut(|sm| sm.document_id) {
+        prefiltered_documents += 1;
+        if let Some(raw_document) = RawDocument::new(bare_matches, &automatons, &mut arena, searchable_attrs.as_ref()) {
+            raw_documents.push(raw_document);
+        }
+    }
+    debug!("creating {} (original {}) candidates documents took {:.02?}",
+        raw_documents.len(),
+        prefiltered_documents,
+        before_raw_documents_building.elapsed(),
+    );
+
+    let mut groups = vec![raw_documents.as_mut_slice()];
+    let mut key_cache = HashMap::new();
+
+    let mut filter_map = HashMap::new();
+    // these two variables informs on the current distinct map and
+    // on the raw offset of the start of the group where the
+    // range.start bound is located according to the distinct function
+    let mut distinct_map = DistinctMap::new(distinct_size);
+    let mut distinct_raw_offset = 0;
+
+    'criteria: for criterion in criteria.as_ref() {
+        let tmp_groups = mem::replace(&mut groups, Vec::new());
+        let mut buf_distinct = BufferedDistinctMap::new(&mut distinct_map);
+        let mut documents_seen = 0;
+
+        for mut group in tmp_groups {
+            // if this group does not overlap with the requested range,
+            // push it without sorting and splitting it
+            if documents_seen + group.len() < distinct_raw_offset {
+                documents_seen += group.len();
+                groups.push(group);
+                continue;
+            }
+
+            let ctx = ContextMut {
+                reader,
+                postings_lists: &mut arena,
+                query_enhancer: &mut query_enhancer,
+                automatons: &mut automatons,
+                documents_fields_counts_store,
+            };
+
+            let before_criterion_preparation = Instant::now();
+            criterion.prepare(ctx, &mut group)?;
+            debug!("{:?} preparation took {:.02?}", criterion.name(), before_criterion_preparation.elapsed());
+
+            let ctx = Context {
+                postings_lists: &arena,
+                query_enhancer: &query_enhancer,
+                automatons: &automatons,
+            };
+
+            let before_criterion_sort = Instant::now();
+            group.sort_unstable_by(|a, b| criterion.evaluate(&ctx, a, b));
+            debug!("{:?} evaluation took {:.02?}", criterion.name(), before_criterion_sort.elapsed());
+
+            for group in group.binary_group_by_mut(|a, b| criterion.eq(&ctx, a, b)) {
+                // we must compute the real distinguished len of this sub-group
+                for document in group.iter() {
+                    let filter_accepted = match &filter {
+                        Some(filter) => {
+                            let entry = filter_map.entry(document.id);
+                            *entry.or_insert_with(|| (filter)(document.id))
+                        }
+                        None => true,
+                    };
+
+                    if filter_accepted {
+                        let entry = key_cache.entry(document.id);
+                        let key = entry.or_insert_with(|| (distinct)(document.id).map(Rc::new));
+
+                        match key.clone() {
+                            Some(key) => buf_distinct.register(key),
+                            None => buf_distinct.register_without_key(),
+                        };
+                    }
+
+                    // the requested range end is reached: stop computing distinct
+                    if buf_distinct.len() >= range.end {
+                        break;
+                    }
+                }
+
+                documents_seen += group.len();
+                groups.push(group);
+
+                // if this sub-group does not overlap with the requested range
+                // we must update the distinct map and its start index
+                if buf_distinct.len() < range.start {
+                    buf_distinct.transfert_to_internal();
+                    distinct_raw_offset = documents_seen;
+                }
+
+                // we have sort enough documents if the last document sorted is after
+                // the end of the requested range, we can continue to the next criterion
+                if buf_distinct.len() >= range.end {
+                    continue 'criteria;
+                }
+            }
+        }
+    }
+
+    // once we classified the documents related to the current
+    // automatons we save that as the next valid result
+    let mut seen = BufferedDistinctMap::new(&mut distinct_map);
+
+    let mut documents = Vec::with_capacity(range.len());
+    for raw_document in raw_documents.into_iter().skip(distinct_raw_offset) {
+        let filter_accepted = match &filter {
+            Some(_) => filter_map.remove(&raw_document.id).unwrap(),
+            None => true,
+        };
+
+        if filter_accepted {
+            let key = key_cache.remove(&raw_document.id).unwrap();
+            let distinct_accepted = match key {
+                Some(key) => seen.register(key),
+                None => seen.register_without_key(),
+            };
+
+            if distinct_accepted && seen.len() > range.start {
+                documents.push(Document::from_raw(raw_document, &automatons, &arena, searchable_attrs.as_ref()));
+                if documents.len() == range.len() {
+                    break;
+                }
+            }
+        }
+    }
+
+    Ok(documents)
+}
+
+pub struct BareMatch<'tag> {
+    pub document_id: DocumentId,
+    pub query_index: u16,
+    pub distance: u8,
+    pub is_exact: bool,
+    pub postings_list: Idx32<'tag>,
+}
+
+impl fmt::Debug for BareMatch<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("BareMatch")
+            .field("document_id", &self.document_id)
+            .field("query_index", &self.query_index)
+            .field("distance", &self.distance)
+            .field("is_exact", &self.is_exact)
+            .finish()
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub struct SimpleMatch {
+    pub query_index: u16,
+    pub distance: u8,
+    pub attribute: u16,
+    pub word_index: u16,
+    pub is_exact: bool,
+}
+
+#[derive(Clone)]
+pub enum PostingsListView<'txn> {
+    Original {
+        input: Rc<[u8]>,
+        postings_list: Rc<Cow<'txn, Set<DocIndex>>>,
+        offset: usize,
+        len: usize,
+    },
+    Rewritten {
+        input: Rc<[u8]>,
+        postings_list: SetBuf<DocIndex>,
+    },
+}
+
+impl fmt::Debug for PostingsListView<'_> {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        f.debug_struct("PostingsListView")
+            .field("input", &std::str::from_utf8(&self.input()).unwrap())
+            .field("postings_list", &self.as_ref())
+            .finish()
+    }
+}
+
+impl<'txn> PostingsListView<'txn> {
+    pub fn original(input: Rc<[u8]>, postings_list: Rc<Cow<'txn, Set<DocIndex>>>) -> PostingsListView<'txn> {
+        let len = postings_list.len();
+        PostingsListView::Original { input, postings_list, offset: 0, len }
+    }
+
+    pub fn rewritten(input: Rc<[u8]>, postings_list: SetBuf<DocIndex>) -> PostingsListView<'txn> {
+        PostingsListView::Rewritten { input, postings_list }
+    }
+
+    pub fn rewrite_with(&mut self, postings_list: SetBuf<DocIndex>) {
+        let input = match self {
+            PostingsListView::Original { input, .. } => input.clone(),
+            PostingsListView::Rewritten { input, .. } => input.clone(),
+        };
+        *self = PostingsListView::rewritten(input, postings_list);
+    }
+
+    pub fn len(&self) -> usize {
+        match self {
+            PostingsListView::Original { len, .. } => *len,
+            PostingsListView::Rewritten { postings_list, .. } => postings_list.len(),
+        }
+    }
+
+    pub fn input(&self) -> &[u8] {
+        match self {
+            PostingsListView::Original { ref input, .. } => input,
+            PostingsListView::Rewritten { ref input, .. } => input,
+        }
+    }
+
+    pub fn range(&self, range_offset: usize, range_len: usize) -> PostingsListView<'txn> {
+        match self {
+            PostingsListView::Original { input, postings_list, offset, len } => {
+                assert!(range_offset + range_len <= *len);
+                PostingsListView::Original {
+                    input: input.clone(),
+                    postings_list: postings_list.clone(),
+                    offset: offset + range_offset,
+                    len: range_len,
+                }
+            },
+            PostingsListView::Rewritten { .. } => {
+                panic!("Cannot create a range on a rewritten postings list view");
+            }
+        }
+    }
+}
+
+impl AsRef<Set<DocIndex>> for PostingsListView<'_> {
+    fn as_ref(&self) -> &Set<DocIndex> {
+        self
+    }
+}
+
+impl Deref for PostingsListView<'_> {
+    type Target = Set<DocIndex>;
+
+    fn deref(&self) -> &Set<DocIndex> {
+        match *self {
+            PostingsListView::Original { ref postings_list, offset, len, .. } => {
+                Set::new_unchecked(&postings_list[offset..offset + len])
+            },
+            PostingsListView::Rewritten { ref postings_list, .. } => postings_list,
+        }
+    }
+}
+
+fn fetch_matches<'txn, 'tag>(
+    reader: &'txn heed::RoTxn<MainT>,
+    automatons: &[QueryWordAutomaton],
+    arena: &mut SmallArena<'tag, PostingsListView<'txn>>,
+    main_store: store::Main,
+    postings_lists_store: store::PostingsLists,
+) -> MResult<Vec<BareMatch<'tag>>>
+{
+    let before_words_fst = Instant::now();
+    let words = match main_store.words_fst(reader)? {
+        Some(words) => words,
+        None => return Ok(Vec::new()),
+    };
+    debug!("words fst took {:.02?}", before_words_fst.elapsed());
+
+    let mut total_postings_lists = Vec::new();
+
+    let mut dfa_time = Duration::default();
+    let mut stream_next_time = Duration::default();
+    let mut postings_lists_fetching_time = Duration::default();
+
+    for (query_index, automaton) in automatons.iter().enumerate() {
+        let before_dfa = Instant::now();
+        let dfa = automaton.dfa();
+        let QueryWordAutomaton { query, is_exact, .. } = automaton;
+        dfa_time += before_dfa.elapsed();
+
+        let mut number_of_words = 0;
+        let mut stream = words.search(&dfa).into_stream();
+
+        // while let Some(input) = stream.next() {
+        loop {
+            let before_stream_next = Instant::now();
+            let input = match stream.next() {
+                Some(input) => input,
+                None => break,
+            };
+            stream_next_time += before_stream_next.elapsed();
+
+            number_of_words += 1;
+
+            let distance = dfa.eval(input).to_u8();
+            let is_exact = *is_exact && distance == 0 && input.len() == query.len();
+
+            let before_postings_lists_fetching = Instant::now();
+            if let Some(postings_list) = postings_lists_store.postings_list(reader, input)? {
+
+                let input = Rc::from(input);
+                let postings_list = Rc::new(postings_list);
+                let postings_list_view = PostingsListView::original(input, postings_list);
+
+                let mut offset = 0;
+                for group in postings_list_view.linear_group_by_key(|di| di.document_id) {
+
+                    let posting_list_index = arena.add(postings_list_view.range(offset, group.len()));
+                    let document_id = group[0].document_id;
+                    let bare_match = BareMatch {
+                        document_id,
+                        query_index: query_index as u16,
+                        distance,
+                        is_exact,
+                        postings_list: posting_list_index,
+                    };
+
+                    total_postings_lists.push(bare_match);
+                    offset += group.len();
+                }
+            }
+            postings_lists_fetching_time += before_postings_lists_fetching.elapsed();
+        }
+
+        debug!("{:?} gives {} words", query, number_of_words);
+    }
+
+    debug!("stream next took {:.02?}", stream_next_time);
+    debug!("postings lists fetching took {:.02?}", postings_lists_fetching_time);
+    debug!("dfa creation took {:.02?}", dfa_time);
+
+    Ok(total_postings_lists)
+}
+
+#[derive(Debug)]
+pub struct QueryWordAutomaton {
+    pub query: String,
+    /// Is it a word that must be considered exact
+    /// or is it some derived word (i.e. a synonym)
+    pub is_exact: bool,
+    pub is_prefix: bool,
+    /// If it's a phrase query and what is
+    /// its index an the length of the phrase
+    pub phrase_query: Option<(u16, u16)>,
+}
+
+impl QueryWordAutomaton {
+    pub fn exact(query: &str) -> QueryWordAutomaton {
+        QueryWordAutomaton {
+            query: query.to_string(),
+            is_exact: true,
+            is_prefix: false,
+            phrase_query: None,
+        }
+    }
+
+    pub fn exact_prefix(query: &str) -> QueryWordAutomaton {
+        QueryWordAutomaton {
+            query: query.to_string(),
+            is_exact: true,
+            is_prefix: true,
+            phrase_query: None,
+        }
+    }
+
+    pub fn non_exact(query: &str) -> QueryWordAutomaton {
+        QueryWordAutomaton {
+            query: query.to_string(),
+            is_exact: false,
+            is_prefix: false,
+            phrase_query: None,
+        }
+    }
+
+    pub fn dfa(&self) -> DFA {
+        if self.phrase_query.is_some() {
+            build_exact_dfa(&self.query)
+        } else if self.is_prefix {
+            build_prefix_dfa(&self.query)
+        } else {
+            build_dfa(&self.query)
+        }
+    }
+}
+
+fn split_best_frequency<'a>(
+    reader: &heed::RoTxn<MainT>,
+    word: &'a str,
+    postings_lists_store: store::PostingsLists,
+) -> MResult<Option<(&'a str, &'a str)>> {
+    let chars = word.char_indices().skip(1);
+    let mut best = None;
+
+    for (i, _) in chars {
+        let (left, right) = word.split_at(i);
+
+        let left_freq = postings_lists_store
+            .postings_list(reader, left.as_ref())?
+            .map_or(0, |i| i.len());
+
+        let right_freq = postings_lists_store
+            .postings_list(reader, right.as_ref())?
+            .map_or(0, |i| i.len());
+
+        let min_freq = cmp::min(left_freq, right_freq);
+        if min_freq != 0 && best.map_or(true, |(old, _, _)| min_freq > old) {
+            best = Some((min_freq, left, right));
+        }
+    }
+
+    Ok(best.map(|(_, l, r)| (l, r)))
+}
+
+fn construct_automatons(
+    reader: &heed::RoTxn<MainT>,
+    query: &str,
+    main_store: store::Main,
+    postings_lists_store: store::PostingsLists,
+    synonym_store: store::Synonyms,
+) -> MResult<(Vec<QueryWordAutomaton>, QueryEnhancer)> {
+    let has_end_whitespace = query.chars().last().map_or(false, char::is_whitespace);
+    let query_words: Vec<_> = split_query_string(query).map(str::to_lowercase).collect();
+    let synonyms = match main_store.synonyms_fst(reader)? {
+        Some(synonym) => synonym,
+        None => fst::Set::default(),
+    };
+
+    let mut automaton_index = 0;
+    let mut automatons = Vec::new();
+    let mut enhancer_builder = QueryEnhancerBuilder::new(&query_words);
+
+    // We must not declare the original words to the query enhancer
+    // *but* we need to push them in the automatons list first
+    let mut original_words = query_words.iter().peekable();
+    while let Some(word) = original_words.next() {
+        let has_following_word = original_words.peek().is_some();
+        let not_prefix_dfa = has_following_word || has_end_whitespace || word.chars().all(is_cjk);
+
+        let automaton = if not_prefix_dfa {
+            QueryWordAutomaton::exact(word)
+        } else {
+            QueryWordAutomaton::exact_prefix(word)
+        };
+        automaton_index += 1;
+        automatons.push(automaton);
+    }
+
+    for n in 1..=NGRAMS {
+        let mut ngrams = query_words.windows(n).enumerate().peekable();
+        while let Some((query_index, ngram_slice)) = ngrams.next() {
+            let query_range = query_index..query_index + n;
+            let ngram_nb_words = ngram_slice.len();
+            let ngram = ngram_slice.join(" ");
+
+            let has_following_word = ngrams.peek().is_some();
+            let not_prefix_dfa =
+                has_following_word || has_end_whitespace || ngram.chars().all(is_cjk);
+
+            // automaton of synonyms of the ngrams
+            let normalized = normalize_str(&ngram);
+            let lev = if not_prefix_dfa {
+                build_dfa(&normalized)
+            } else {
+                build_prefix_dfa(&normalized)
+            };
+
+            let mut stream = synonyms.search(&lev).into_stream();
+            while let Some(base) = stream.next() {
+                // only trigger alternatives when the last word has been typed
+                // i.e. "new " do not but "new yo" triggers alternatives to "new york"
+                let base = std::str::from_utf8(base).unwrap();
+                let base_nb_words = split_query_string(base).count();
+                if ngram_nb_words != base_nb_words {
+                    continue;
+                }
+
+                if let Some(synonyms) = synonym_store.synonyms(reader, base.as_bytes())? {
+                    let mut stream = synonyms.into_stream();
+                    while let Some(synonyms) = stream.next() {
+                        let synonyms = std::str::from_utf8(synonyms).unwrap();
+                        let synonyms_words: Vec<_> = split_query_string(synonyms).collect();
+                        let nb_synonym_words = synonyms_words.len();
+
+                        let real_query_index = automaton_index;
+                        enhancer_builder.declare(query_range.clone(), real_query_index, &synonyms_words);
+
+                        for synonym in synonyms_words {
+                            let automaton = if nb_synonym_words == 1 {
+                                QueryWordAutomaton::exact(synonym)
+                            } else {
+                                QueryWordAutomaton::non_exact(synonym)
+                            };
+                            automaton_index += 1;
+                            automatons.push(automaton);
+                        }
+                    }
+                }
+            }
+
+            if n == 1 {
+                // automatons for splitted words
+                if let Some((left, right)) = split_best_frequency(reader, &normalized, postings_lists_store)? {
+                    let mut left_automaton = QueryWordAutomaton::exact(left);
+                    left_automaton.phrase_query = Some((0, 2));
+                    enhancer_builder.declare(query_range.clone(), automaton_index, &[left]);
+                    automaton_index += 1;
+                    automatons.push(left_automaton);
+
+                    let mut right_automaton = QueryWordAutomaton::exact(right);
+                    right_automaton.phrase_query = Some((1, 2));
+                    enhancer_builder.declare(query_range.clone(), automaton_index, &[right]);
+                    automaton_index += 1;
+                    automatons.push(right_automaton);
+                }
+            } else {
+                // automaton of concatenation of query words
+                let concat = ngram_slice.concat();
+                let normalized = normalize_str(&concat);
+
+                let real_query_index = automaton_index;
+                enhancer_builder.declare(query_range.clone(), real_query_index, &[&normalized]);
+
+                let automaton = QueryWordAutomaton::exact(&normalized);
+                automaton_index += 1;
+                automatons.push(automaton);
+            }
+        }
+    }
+
+    Ok((automatons, enhancer_builder.build()))
+}
--- a/meilisearch-core/src/criterion/attribute.rs
+++ b/meilisearch-core/src/criterion/attribute.rs
@ -0,0 +1,37 @@
+use std::cmp::Ordering;
+use slice_group_by::GroupBy;
+use crate::{RawDocument, MResult};
+use crate::bucket_sort::SimpleMatch;
+use super::{Criterion, Context, ContextMut, prepare_bare_matches};
+
+pub struct Attribute;
+
+impl Criterion for Attribute {
+    fn name(&self) -> &str { "attribute" }
+
+    fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(
+        &self,
+        ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,
+        documents: &mut [RawDocument<'r, 'tag>],
+    ) -> MResult<()>
+    {
+        prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
+        Ok(())
+    }
+
+    fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        #[inline]
+        fn sum_of_attribute(matches: &[SimpleMatch]) -> usize {
+            let mut sum_of_attribute = 0;
+            for group in matches.linear_group_by_key(|bm| bm.query_index) {
+                sum_of_attribute += group[0].attribute as usize;
+            }
+            sum_of_attribute
+        }
+
+        let lhs = sum_of_attribute(&lhs.processed_matches);
+        let rhs = sum_of_attribute(&rhs.processed_matches);
+
+        lhs.cmp(&rhs)
+    }
+}
--- a/meilisearch-core/src/criterion/document_id.rs
+++ b/meilisearch-core/src/criterion/document_id.rs
@ -1,16 +1,16 @@
-use crate::criterion::Criterion;
-use crate::RawDocument;
 use std::cmp::Ordering;
+use crate::RawDocument;
+use super::{Criterion, Context};

-#[derive(Debug, Clone, Copy)]
 pub struct DocumentId;

 impl Criterion for DocumentId {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        lhs.id.cmp(&rhs.id)
-    }
+    fn name(&self) -> &str { "stable document id" }

-    fn name(&self) -> &str {
-        "DocumentId"
+    fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = &lhs.id;
+        let rhs = &rhs.id;
+
+        lhs.cmp(rhs)
    }
 }
--- a/meilisearch-core/src/criterion/exact.rs
+++ b/meilisearch-core/src/criterion/exact.rs
@ -1,132 +1,78 @@
-use std::cmp::Ordering;
-
+use std::cmp::{Ordering, Reverse};
+use std::collections::hash_map::{HashMap, Entry};
 use meilisearch_schema::SchemaAttr;
-use sdset::Set;
 use slice_group_by::GroupBy;
+use crate::{RawDocument, MResult};
+use crate::bucket_sort::BareMatch;
+use super::{Criterion, Context, ContextMut};

-use crate::criterion::Criterion;
-use crate::RawDocument;
+pub struct Exact;

-#[inline]
-fn number_exact_matches(
-    query_index: &[u32],
-    attribute: &[u16],
-    is_exact: &[bool],
-    fields_counts: &Set<(SchemaAttr, u64)>,
-) -> usize {
-    let mut count = 0;
-    let mut index = 0;
+impl Criterion for Exact {
+    fn name(&self) -> &str { "exact" }

-    for group in query_index.linear_group() {
-        let len = group.len();
+    fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(
+        &self,
+        ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,
+        documents: &mut [RawDocument<'r, 'tag>],
+    ) -> MResult<()>
+    {
+        let store = ctx.documents_fields_counts_store;
+        let reader = ctx.reader;

-        let mut found_exact = false;
-        for (pos, is_exact) in is_exact[index..index + len].iter().enumerate() {
-            if *is_exact {
-                found_exact = true;
-                let attr = &attribute[index + pos];
-                if let Ok(pos) = fields_counts.binary_search_by_key(attr, |(a, _)| a.0) {
-                    let (_, count) = fields_counts[pos];
-                    if count == 1 {
-                        return usize::max_value();
+        'documents: for doc in documents {
+            doc.bare_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
+
+            // mark the document if we find a "one word field" that matches
+            let mut fields_counts = HashMap::new();
+            for group in doc.bare_matches.linear_group_by_key(|bm| bm.query_index) {
+                for group in group.linear_group_by_key(|bm| bm.is_exact) {
+                    if !group[0].is_exact { break }
+
+                    for bm in group {
+                        for di in ctx.postings_lists[bm.postings_list].as_ref() {
+
+                            let attr = SchemaAttr(di.attribute);
+                            let count = match fields_counts.entry(attr) {
+                                Entry::Occupied(entry) => *entry.get(),
+                                Entry::Vacant(entry) => {
+                                    let count = store.document_field_count(reader, doc.id, attr)?;
+                                    *entry.insert(count)
+                                },
+                            };
+
+                            if count == Some(1) {
+                                doc.contains_one_word_field = true;
+                                continue 'documents
+                            }
+                        }
                    }
                }
            }
        }

-        count += found_exact as usize;
-        index += len;
+        Ok(())
    }

-    count
-}
+    fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        #[inline]
+        fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
+            let mut sum_exact_query_words = 0;

-#[derive(Debug, Clone, Copy)]
-pub struct Exact;
+            for group in matches.linear_group_by_key(|bm| bm.query_index) {
+                sum_exact_query_words += group[0].is_exact as usize;
+            }

-impl Criterion for Exact {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            let is_exact = lhs.is_exact();
-            let attribute = lhs.attribute();
-            let fields_counts = &lhs.fields_counts;
+            sum_exact_query_words
+        }

-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
-
-        let rhs = {
-            let query_index = rhs.query_index();
-            let is_exact = rhs.is_exact();
-            let attribute = rhs.attribute();
-            let fields_counts = &rhs.fields_counts;
-
-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
-
-        lhs.cmp(&rhs).reverse()
-    }
-
-    fn name(&self) -> &str {
-        "Exact"
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    // typing: "soulier"
-    //
-    // doc0: "Soulier bleu"
-    // doc1: "souliereres rouge"
-    #[test]
-    fn easy_case() {
-        let doc0 = {
-            let query_index = &[0];
-            let attribute = &[0];
-            let is_exact = &[true];
-            let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap();
-
-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
-
-        let doc1 = {
-            let query_index = &[0];
-            let attribute = &[0];
-            let is_exact = &[false];
-            let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap();
-
-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
-
-        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
-    }
-
-    // typing: "soulier"
-    //
-    // doc0: { 0. "soulier" }
-    // doc1: { 0. "soulier bleu et blanc" }
-    #[test]
-    fn basic() {
-        let doc0 = {
-            let query_index = &[0];
-            let attribute = &[0];
-            let is_exact = &[true];
-            let fields_counts = Set::new(&[(SchemaAttr(0), 1)]).unwrap();
-
-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
-
-        let doc1 = {
-            let query_index = &[0];
-            let attribute = &[0];
-            let is_exact = &[true];
-            let fields_counts = Set::new(&[(SchemaAttr(0), 4)]).unwrap();
-
-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
-
-        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+        // does it contains a "one word field"
+        lhs.contains_one_word_field.cmp(&rhs.contains_one_word_field).reverse()
+        // if not, with document contains the more exact words
+        .then_with(|| {
+            let lhs = sum_exact_query_words(&lhs.bare_matches);
+            let rhs = sum_exact_query_words(&rhs.bare_matches);
+            lhs.cmp(&rhs).reverse()
+        })
    }
 }
--- a/meilisearch-core/src/criterion/mod.rs
+++ b/meilisearch-core/src/criterion/mod.rs
@ -1,59 +1,75 @@
-mod document_id;
+use std::cmp::{self, Ordering};
+
+use compact_arena::SmallArena;
+use sdset::SetBuf;
+use slice_group_by::GroupBy;
+
+use crate::{store, RawDocument, MResult};
+use crate::automaton::QueryEnhancer;
+use crate::bucket_sort::{SimpleMatch, PostingsListView, QueryWordAutomaton};
+use crate::database::MainT;
+
+mod typo;
+mod words;
+mod proximity;
+mod attribute;
+mod words_position;
 mod exact;
-mod number_of_words;
+mod document_id;
 mod sort_by_attr;
-mod sum_of_typos;
-mod sum_of_words_attribute;
-mod sum_of_words_position;
-mod words_proximity;

-use crate::RawDocument;
-use std::cmp::Ordering;
-
-pub use self::{
-    document_id::DocumentId, exact::Exact, number_of_words::NumberOfWords,
-    sort_by_attr::SortByAttr, sum_of_typos::SumOfTypos,
-    sum_of_words_attribute::SumOfWordsAttribute, sum_of_words_position::SumOfWordsPosition,
-    words_proximity::WordsProximity,
-};
-
-pub trait Criterion: Send + Sync {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering;
+pub use self::typo::Typo;
+pub use self::words::Words;
+pub use self::proximity::Proximity;
+pub use self::attribute::Attribute;
+pub use self::words_position::WordsPosition;
+pub use self::exact::Exact;
+pub use self::document_id::DocumentId;
+pub use self::sort_by_attr::SortByAttr;

+pub trait Criterion {
    fn name(&self) -> &str;

+    fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(
+        &self,
+        _ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,
+        _documents: &mut [RawDocument<'r, 'tag>],
+    ) -> MResult<()>
+    {
+        Ok(())
+    }
+
+    fn evaluate<'p, 'tag, 'txn, 'q, 'a, 'r>(
+        &self,
+        ctx: &Context<'p, 'tag, 'txn, 'q, 'a>,
+        lhs: &RawDocument<'r, 'tag>,
+        rhs: &RawDocument<'r, 'tag>,
+    ) -> Ordering;
+
    #[inline]
-    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
-        self.evaluate(lhs, rhs) == Ordering::Equal
+    fn eq<'p, 'tag, 'txn, 'q, 'a, 'r>(
+        &self,
+        ctx: &Context<'p, 'tag, 'txn, 'q, 'a>,
+        lhs: &RawDocument<'r, 'tag>,
+        rhs: &RawDocument<'r, 'tag>,
+    ) -> bool
+    {
+        self.evaluate(ctx, lhs, rhs) == Ordering::Equal
    }
 }

-impl<'a, T: Criterion + ?Sized + Send + Sync> Criterion for &'a T {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        (**self).evaluate(lhs, rhs)
-    }
-
-    fn name(&self) -> &str {
-        (**self).name()
-    }
-
-    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
-        (**self).eq(lhs, rhs)
-    }
+pub struct ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a> {
+    pub reader: &'h heed::RoTxn<MainT>,
+    pub postings_lists: &'p mut SmallArena<'tag, PostingsListView<'txn>>,
+    pub query_enhancer: &'q mut QueryEnhancer,
+    pub automatons: &'a mut [QueryWordAutomaton],
+    pub documents_fields_counts_store: store::DocumentsFieldsCounts,
 }

-impl<T: Criterion + ?Sized> Criterion for Box<T> {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        (**self).evaluate(lhs, rhs)
-    }
-
-    fn name(&self) -> &str {
-        (**self).name()
-    }
-
-    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
-        (**self).eq(lhs, rhs)
-    }
+pub struct Context<'p, 'tag, 'txn, 'q, 'a> {
+    pub postings_lists: &'p SmallArena<'tag, PostingsListView<'txn>>,
+    pub query_enhancer: &'q QueryEnhancer,
+    pub automatons: &'a [QueryWordAutomaton],
 }

 #[derive(Default)]
@ -103,11 +119,11 @@ pub struct Criteria<'a> {
 impl<'a> Default for Criteria<'a> {
    fn default() -> Self {
        CriteriaBuilder::with_capacity(7)
-            .add(SumOfTypos)
-            .add(NumberOfWords)
-            .add(WordsProximity)
-            .add(SumOfWordsAttribute)
-            .add(SumOfWordsPosition)
+            .add(Typo)
+            .add(Words)
+            .add(Proximity)
+            .add(Attribute)
+            .add(WordsPosition)
            .add(Exact)
            .add(DocumentId)
            .build()
@ -119,3 +135,162 @@ impl<'a> AsRef<[Box<dyn Criterion + 'a>]> for Criteria<'a> {
        &self.inner
    }
 }
+
+fn prepare_query_distances<'a, 'tag, 'txn>(
+    documents: &mut [RawDocument<'a, 'tag>],
+    query_enhancer: &QueryEnhancer,
+    postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
+) {
+    for document in documents {
+        if !document.processed_distances.is_empty() { continue }
+
+        let mut processed = Vec::new();
+        for m in document.bare_matches.iter() {
+            if postings_lists[m.postings_list].is_empty() { continue }
+
+            let range = query_enhancer.replacement(m.query_index as u32);
+            let new_len = cmp::max(range.end as usize, processed.len());
+            processed.resize(new_len, None);
+
+            for index in range {
+                let index = index as usize;
+                processed[index] = match processed[index] {
+                    Some(distance) if distance > m.distance => Some(m.distance),
+                    Some(distance) => Some(distance),
+                    None => Some(m.distance),
+                };
+            }
+        }
+
+        document.processed_distances = processed;
+    }
+}
+
+fn prepare_bare_matches<'a, 'tag, 'txn>(
+    documents: &mut [RawDocument<'a, 'tag>],
+    postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
+    query_enhancer: &QueryEnhancer,
+) {
+    for document in documents {
+        if !document.processed_matches.is_empty() { continue }
+
+        let mut processed = Vec::new();
+        for m in document.bare_matches.iter() {
+            let postings_list = &postings_lists[m.postings_list];
+            processed.reserve(postings_list.len());
+            for di in postings_list.as_ref() {
+                let simple_match = SimpleMatch {
+                    query_index: m.query_index,
+                    distance: m.distance,
+                    attribute: di.attribute,
+                    word_index: di.word_index,
+                    is_exact: m.is_exact,
+                };
+                processed.push(simple_match);
+            }
+        }
+
+        let processed = multiword_rewrite_matches(&mut processed, query_enhancer);
+        document.processed_matches = processed.into_vec();
+    }
+}
+
+fn multiword_rewrite_matches(
+    matches: &mut [SimpleMatch],
+    query_enhancer: &QueryEnhancer,
+) -> SetBuf<SimpleMatch>
+{
+    matches.sort_unstable_by_key(|m| (m.attribute, m.word_index));
+
+    let mut padded_matches = Vec::with_capacity(matches.len());
+
+    // let before_padding = Instant::now();
+    // for each attribute of each document
+    for same_document_attribute in matches.linear_group_by_key(|m| m.attribute) {
+        // padding will only be applied
+        // to word indices in the same attribute
+        let mut padding = 0;
+        let mut iter = same_document_attribute.linear_group_by_key(|m| m.word_index);
+
+        // for each match at the same position
+        // in this document attribute
+        while let Some(same_word_index) = iter.next() {
+            // find the biggest padding
+            let mut biggest = 0;
+            for match_ in same_word_index {
+                let mut replacement = query_enhancer.replacement(match_.query_index as u32);
+                let replacement_len = replacement.len();
+                let nexts = iter.remainder().linear_group_by_key(|m| m.word_index);
+
+                if let Some(query_index) = replacement.next() {
+                    let word_index = match_.word_index + padding as u16;
+                    let query_index = query_index as u16;
+                    let match_ = SimpleMatch { query_index, word_index, ..*match_ };
+                    padded_matches.push(match_);
+                }
+
+                let mut found = false;
+
+                // look ahead and if there already is a match
+                // corresponding to this padding word, abort the padding
+                'padding: for (x, next_group) in nexts.enumerate() {
+                    for (i, query_index) in replacement.clone().enumerate().skip(x) {
+                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
+                        let query_index = query_index as u16;
+                        let padmatch = SimpleMatch { query_index, word_index, ..*match_ };
+
+                        for nmatch_ in next_group {
+                            let mut rep = query_enhancer.replacement(nmatch_.query_index as u32);
+                            let query_index = rep.next().unwrap() as u16;
+                            if query_index == padmatch.query_index {
+                                if !found {
+                                    // if we find a corresponding padding for the
+                                    // first time we must push preceding paddings
+                                    for (i, query_index) in replacement.clone().enumerate().take(i)
+                                    {
+                                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
+                                        let query_index = query_index as u16;
+                                        let match_ = SimpleMatch { query_index, word_index, ..*match_ };
+                                        padded_matches.push(match_);
+                                        biggest = biggest.max(i + 1);
+                                    }
+                                }
+
+                                padded_matches.push(padmatch);
+                                found = true;
+                                continue 'padding;
+                            }
+                        }
+                    }
+
+                    // if we do not find a corresponding padding in the
+                    // next groups so stop here and pad what was found
+                    break;
+                }
+
+                if !found {
+                    // if no padding was found in the following matches
+                    // we must insert the entire padding
+                    for (i, query_index) in replacement.enumerate() {
+                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
+                        let query_index = query_index as u16;
+                        let match_ = SimpleMatch { query_index, word_index, ..*match_ };
+                        padded_matches.push(match_);
+                    }
+
+                    biggest = biggest.max(replacement_len - 1);
+                }
+            }
+
+            padding += biggest;
+        }
+    }
+
+    // debug!("padding matches took {:.02?}", before_padding.elapsed());
+
+    // With this check we can see that the loop above takes something
+    // like 43% of the search time even when no rewrite is needed.
+    // assert_eq!(before_matches, padded_matches);
+
+    SetBuf::from_dirty(padded_matches)
+}
--- a/meilisearch-core/src/criterion/number_of_words.rs
+++ b/meilisearch-core/src/criterion/number_of_words.rs
@ -1,31 +0,0 @@
-use crate::criterion::Criterion;
-use crate::RawDocument;
-use slice_group_by::GroupBy;
-use std::cmp::Ordering;
-
-#[inline]
-fn number_of_query_words(query_index: &[u32]) -> usize {
-    query_index.linear_group().count()
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct NumberOfWords;
-
-impl Criterion for NumberOfWords {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            number_of_query_words(query_index)
-        };
-        let rhs = {
-            let query_index = rhs.query_index();
-            number_of_query_words(query_index)
-        };
-
-        lhs.cmp(&rhs).reverse()
-    }
-
-    fn name(&self) -> &str {
-        "NumberOfWords"
-    }
-}
--- a/meilisearch-core/src/criterion/proximity.rs
+++ b/meilisearch-core/src/criterion/proximity.rs
@ -0,0 +1,68 @@
+use std::cmp::{self, Ordering};
+use slice_group_by::GroupBy;
+use crate::bucket_sort::{SimpleMatch};
+use crate::{RawDocument, MResult};
+use super::{Criterion, Context, ContextMut, prepare_bare_matches};
+
+const MAX_DISTANCE: u16 = 8;
+
+pub struct Proximity;
+
+impl Criterion for Proximity {
+    fn name(&self) -> &str { "proximity" }
+
+    fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(
+        &self,
+        ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,
+        documents: &mut [RawDocument<'r, 'tag>],
+    ) -> MResult<()>
+    {
+        prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
+        Ok(())
+    }
+
+    fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        fn index_proximity(lhs: u16, rhs: u16) -> u16 {
+            if lhs < rhs {
+                cmp::min(rhs - lhs, MAX_DISTANCE)
+            } else {
+                cmp::min(lhs - rhs, MAX_DISTANCE) + 1
+            }
+        }
+
+        fn attribute_proximity(lhs: SimpleMatch, rhs: SimpleMatch) -> u16 {
+            if lhs.attribute != rhs.attribute { MAX_DISTANCE }
+            else { index_proximity(lhs.word_index, rhs.word_index) }
+        }
+
+        fn min_proximity(lhs: &[SimpleMatch], rhs: &[SimpleMatch]) -> u16 {
+            let mut min_prox = u16::max_value();
+            for a in lhs {
+                for b in rhs {
+                    let prox = attribute_proximity(*a, *b);
+                    min_prox = cmp::min(min_prox, prox);
+                }
+            }
+            min_prox
+        }
+
+        fn matches_proximity(matches: &[SimpleMatch],) -> u16 {
+            let mut proximity = 0;
+            let mut iter = matches.linear_group_by_key(|m| m.query_index);
+
+            // iterate over groups by windows of size 2
+            let mut last = iter.next();
+            while let (Some(lhs), Some(rhs)) = (last, iter.next()) {
+                proximity += min_proximity(lhs, rhs);
+                last = Some(rhs);
+            }
+
+            proximity
+        }
+
+        let lhs = matches_proximity(&lhs.processed_matches);
+        let rhs = matches_proximity(&rhs.processed_matches);
+
+        lhs.cmp(&rhs)
+    }
+}
--- a/meilisearch-core/src/criterion/sort_by_attr.rs
+++ b/meilisearch-core/src/criterion/sort_by_attr.rs
@ -1,10 +1,9 @@
 use std::cmp::Ordering;
 use std::error::Error;
 use std::fmt;
-
-use crate::criterion::Criterion;
-use crate::{RankedMap, RawDocument};
 use meilisearch_schema::{Schema, SchemaAttr};
+use crate::{RankedMap, RawDocument};
+use super::{Criterion, Context};

 /// An helper struct that permit to sort documents by
 /// some of their stored attributes.
@ -28,11 +27,11 @@ use meilisearch_schema::{Schema, SchemaAttr};
 /// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
 ///
 /// let builder = CriteriaBuilder::with_capacity(8)
-///        .add(SumOfTypos)
-///        .add(NumberOfWords)
-///        .add(WordsProximity)
-///        .add(SumOfWordsAttribute)
-///        .add(SumOfWordsPosition)
+///        .add(Typo)
+///        .add(Words)
+///        .add(Proximity)
+///        .add(Attribute)
+///        .add(WordsPosition)
 ///        .add(Exact)
 ///        .add(custom_ranking)
 ///        .add(DocumentId);
@ -86,8 +85,12 @@ impl<'a> SortByAttr<'a> {
    }
 }

-impl<'a> Criterion for SortByAttr<'a> {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+impl Criterion for SortByAttr<'_> {
+    fn name(&self) -> &str {
+        "sort by attribute"
+    }
+
+    fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
        let lhs = self.ranked_map.get(lhs.id, self.attr);
        let rhs = self.ranked_map.get(rhs.id, self.attr);

@ -105,10 +108,6 @@ impl<'a> Criterion for SortByAttr<'a> {
            (None, None) => Ordering::Equal,
        }
    }
-
-    fn name(&self) -> &str {
-        "SortByAttr"
-    }
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
--- a/meilisearch-core/src/criterion/sum_of_typos.rs
+++ b/meilisearch-core/src/criterion/sum_of_typos.rs
@ -1,116 +0,0 @@
-use std::cmp::Ordering;
-
-use slice_group_by::GroupBy;
-
-use crate::criterion::Criterion;
-use crate::RawDocument;
-
-// This function is a wrong logarithmic 10 function.
-// It is safe to panic on input number higher than 3,
-// the number of typos is never bigger than that.
-#[inline]
-fn custom_log10(n: u8) -> f32 {
-    match n {
-        0 => 0.0,     // log(1)
-        1 => 0.30102, // log(2)
-        2 => 0.47712, // log(3)
-        3 => 0.60205, // log(4)
-        _ => panic!("invalid number"),
-    }
-}
-
-#[inline]
-fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize {
-    let mut number_words: usize = 0;
-    let mut sum_typos = 0.0;
-    let mut index = 0;
-
-    for group in query_index.linear_group() {
-        sum_typos += custom_log10(distance[index]);
-        number_words += 1;
-        index += group.len();
-    }
-
-    (number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct SumOfTypos;
-
-impl Criterion for SumOfTypos {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            let distance = lhs.distance();
-            sum_matches_typos(query_index, distance)
-        };
-
-        let rhs = {
-            let query_index = rhs.query_index();
-            let distance = rhs.distance();
-            sum_matches_typos(query_index, distance)
-        };
-
-        lhs.cmp(&rhs).reverse()
-    }
-
-    fn name(&self) -> &str {
-        "SumOfTypos"
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    // typing: "Geox CEO"
-    //
-    // doc0: "Geox SpA: CEO and Executive"
-    // doc1: "Mt. Gox CEO Resigns From Bitcoin Foundation"
-    #[test]
-    fn one_typo_reference() {
-        let query_index0 = &[0, 1];
-        let distance0 = &[0, 0];
-
-        let query_index1 = &[0, 1];
-        let distance1 = &[1, 0];
-
-        let doc0 = sum_matches_typos(query_index0, distance0);
-        let doc1 = sum_matches_typos(query_index1, distance1);
-        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
-    }
-
-    // typing: "bouton manchette"
-    //
-    // doc0: "bouton manchette"
-    // doc1: "bouton"
-    #[test]
-    fn no_typo() {
-        let query_index0 = &[0, 1];
-        let distance0 = &[0, 0];
-
-        let query_index1 = &[0];
-        let distance1 = &[0];
-
-        let doc0 = sum_matches_typos(query_index0, distance0);
-        let doc1 = sum_matches_typos(query_index1, distance1);
-        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
-    }
-
-    // typing: "bouton manchztte"
-    //
-    // doc0: "bouton manchette"
-    // doc1: "bouton"
-    #[test]
-    fn one_typo() {
-        let query_index0 = &[0, 1];
-        let distance0 = &[0, 1];
-
-        let query_index1 = &[0];
-        let distance1 = &[0];
-
-        let doc0 = sum_matches_typos(query_index0, distance0);
-        let doc1 = sum_matches_typos(query_index1, distance1);
-        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
-    }
-}
--- a/meilisearch-core/src/criterion/sum_of_words_attribute.rs
+++ b/meilisearch-core/src/criterion/sum_of_words_attribute.rs
@ -1,64 +0,0 @@
-use crate::criterion::Criterion;
-use crate::RawDocument;
-use slice_group_by::GroupBy;
-use std::cmp::Ordering;
-
-#[inline]
-fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
-    let mut sum_attributes = 0;
-    let mut index = 0;
-
-    for group in query_index.linear_group() {
-        sum_attributes += attribute[index] as usize;
-        index += group.len();
-    }
-
-    sum_attributes
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct SumOfWordsAttribute;
-
-impl Criterion for SumOfWordsAttribute {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            let attribute = lhs.attribute();
-            sum_matches_attributes(query_index, attribute)
-        };
-
-        let rhs = {
-            let query_index = rhs.query_index();
-            let attribute = rhs.attribute();
-            sum_matches_attributes(query_index, attribute)
-        };
-
-        lhs.cmp(&rhs)
-    }
-
-    fn name(&self) -> &str {
-        "SumOfWordsAttribute"
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    // typing: "soulier"
-    //
-    // doc0: { 0. "Soulier bleu", 1. "bla bla bla" }
-    // doc1: { 0. "Botte rouge", 1. "Soulier en cuir" }
-    #[test]
-    fn title_vs_description() {
-        let query_index0 = &[0];
-        let attribute0 = &[0];
-
-        let query_index1 = &[0];
-        let attribute1 = &[1];
-
-        let doc0 = sum_matches_attributes(query_index0, attribute0);
-        let doc1 = sum_matches_attributes(query_index1, attribute1);
-        assert_eq!(doc0.cmp(&doc1), Ordering::Less);
-    }
-}
--- a/meilisearch-core/src/criterion/sum_of_words_position.rs
+++ b/meilisearch-core/src/criterion/sum_of_words_position.rs
@ -1,64 +0,0 @@
-use crate::criterion::Criterion;
-use crate::RawDocument;
-use slice_group_by::GroupBy;
-use std::cmp::Ordering;
-
-#[inline]
-fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
-    let mut sum_word_index = 0;
-    let mut index = 0;
-
-    for group in query_index.linear_group() {
-        sum_word_index += word_index[index] as usize;
-        index += group.len();
-    }
-
-    sum_word_index
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct SumOfWordsPosition;
-
-impl Criterion for SumOfWordsPosition {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            let word_index = lhs.word_index();
-            sum_matches_attribute_index(query_index, word_index)
-        };
-
-        let rhs = {
-            let query_index = rhs.query_index();
-            let word_index = rhs.word_index();
-            sum_matches_attribute_index(query_index, word_index)
-        };
-
-        lhs.cmp(&rhs)
-    }
-
-    fn name(&self) -> &str {
-        "SumOfWordsPosition"
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    // typing: "soulier"
-    //
-    // doc0: "Soulier bleu"
-    // doc1: "Botte rouge et soulier noir"
-    #[test]
-    fn easy_case() {
-        let query_index0 = &[0];
-        let word_index0 = &[0];
-
-        let query_index1 = &[0];
-        let word_index1 = &[3];
-
-        let doc0 = sum_matches_attribute_index(query_index0, word_index0);
-        let doc1 = sum_matches_attribute_index(query_index1, word_index1);
-        assert_eq!(doc0.cmp(&doc1), Ordering::Less);
-    }
-}
--- a/meilisearch-core/src/criterion/typo.rs
+++ b/meilisearch-core/src/criterion/typo.rs
@ -0,0 +1,55 @@
+use std::cmp::Ordering;
+use crate::{RawDocument, MResult};
+use super::{Criterion, Context, ContextMut, prepare_query_distances};
+
+pub struct Typo;
+
+impl Criterion for Typo {
+    fn name(&self) -> &str { "typo" }
+
+    fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(
+        &self,
+        ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,
+        documents: &mut [RawDocument<'r, 'tag>],
+    ) -> MResult<()>
+    {
+        prepare_query_distances(documents, ctx.query_enhancer, ctx.postings_lists);
+        Ok(())
+    }
+
+    fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        // This function is a wrong logarithmic 10 function.
+        // It is safe to panic on input number higher than 3,
+        // the number of typos is never bigger than that.
+        #[inline]
+        fn custom_log10(n: u8) -> f32 {
+            match n {
+                0 => 0.0,     // log(1)
+                1 => 0.30102, // log(2)
+                2 => 0.47712, // log(3)
+                3 => 0.60205, // log(4)
+                _ => panic!("invalid number"),
+            }
+        }
+
+        #[inline]
+        fn compute_typos(distances: &[Option<u8>]) -> usize {
+            let mut number_words: usize = 0;
+            let mut sum_typos = 0.0;
+
+            for distance in distances {
+                if let Some(distance) = distance {
+                    sum_typos += custom_log10(*distance);
+                    number_words += 1;
+                }
+            }
+
+            (number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
+        }
+
+        let lhs = compute_typos(&lhs.processed_distances);
+        let rhs = compute_typos(&rhs.processed_distances);
+
+        lhs.cmp(&rhs).reverse()
+    }
+}
--- a/meilisearch-core/src/criterion/words.rs
+++ b/meilisearch-core/src/criterion/words.rs
@ -0,0 +1,31 @@
+use std::cmp::Ordering;
+use crate::{RawDocument, MResult};
+use super::{Criterion, Context, ContextMut, prepare_query_distances};
+
+pub struct Words;
+
+impl Criterion for Words {
+    fn name(&self) -> &str { "words" }
+
+    fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(
+        &self,
+        ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,
+        documents: &mut [RawDocument<'r, 'tag>],
+    ) -> MResult<()>
+    {
+        prepare_query_distances(documents, ctx.query_enhancer, ctx.postings_lists);
+        Ok(())
+    }
+
+    fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        #[inline]
+        fn number_of_query_words(distances: &[Option<u8>]) -> usize {
+            distances.iter().cloned().filter(Option::is_some).count()
+        }
+
+        let lhs = number_of_query_words(&lhs.processed_distances);
+        let rhs = number_of_query_words(&rhs.processed_distances);
+
+        lhs.cmp(&rhs).reverse()
+    }
+}
--- a/meilisearch-core/src/criterion/words_position.rs
+++ b/meilisearch-core/src/criterion/words_position.rs
@ -0,0 +1,37 @@
+use std::cmp::Ordering;
+use slice_group_by::GroupBy;
+use crate::bucket_sort::SimpleMatch;
+use crate::{RawDocument, MResult};
+use super::{Criterion, Context, ContextMut, prepare_bare_matches};
+
+pub struct WordsPosition;
+
+impl Criterion for WordsPosition {
+    fn name(&self) -> &str { "words position" }
+
+    fn prepare<'h, 'p, 'tag, 'txn, 'q, 'a, 'r>(
+        &self,
+        ctx: ContextMut<'h, 'p, 'tag, 'txn, 'q, 'a>,
+        documents: &mut [RawDocument<'r, 'tag>],
+    ) -> MResult<()>
+    {
+        prepare_bare_matches(documents, ctx.postings_lists, ctx.query_enhancer);
+        Ok(())
+    }
+
+    fn evaluate(&self, _ctx: &Context, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        #[inline]
+        fn sum_words_position(matches: &[SimpleMatch]) -> usize {
+            let mut sum_words_position = 0;
+            for group in matches.linear_group_by_key(|bm| bm.query_index) {
+                sum_words_position += group[0].word_index as usize;
+            }
+            sum_words_position
+        }
+
+        let lhs = sum_words_position(&lhs.processed_matches);
+        let rhs = sum_words_position(&rhs.processed_matches);
+
+        lhs.cmp(&rhs)
+    }
+}
--- a/meilisearch-core/src/criterion/words_proximity.rs
+++ b/meilisearch-core/src/criterion/words_proximity.rs
@ -1,164 +0,0 @@
-use crate::criterion::Criterion;
-use crate::RawDocument;
-use slice_group_by::GroupBy;
-use std::cmp::{self, Ordering};
-
-const MAX_DISTANCE: u16 = 8;
-
-#[inline]
-fn clone_tuple<T: Clone, U: Clone>((a, b): (&T, &U)) -> (T, U) {
-    (a.clone(), b.clone())
-}
-
-fn index_proximity(lhs: u16, rhs: u16) -> u16 {
-    if lhs < rhs {
-        cmp::min(rhs - lhs, MAX_DISTANCE)
-    } else {
-        cmp::min(lhs - rhs, MAX_DISTANCE) + 1
-    }
-}
-
-fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 {
-    if lattr != rattr {
-        return MAX_DISTANCE;
-    }
-    index_proximity(lwi, rwi)
-}
-
-fn min_proximity((lattr, lwi): (&[u16], &[u16]), (rattr, rwi): (&[u16], &[u16])) -> u16 {
-    let mut min_prox = u16::max_value();
-
-    for a in lattr.iter().zip(lwi) {
-        for b in rattr.iter().zip(rwi) {
-            let a = clone_tuple(a);
-            let b = clone_tuple(b);
-            min_prox = cmp::min(min_prox, attribute_proximity(a, b));
-        }
-    }
-
-    min_prox
-}
-
-fn matches_proximity(
-    query_index: &[u32],
-    distance: &[u8],
-    attribute: &[u16],
-    word_index: &[u16],
-) -> u16 {
-    let mut query_index_groups = query_index.linear_group();
-    let mut proximity = 0;
-    let mut index = 0;
-
-    let get_attr_wi = |index: usize, group_len: usize| {
-        // retrieve the first distance group (with the lowest values)
-        let len = distance[index..index + group_len]
-            .linear_group()
-            .next()
-            .unwrap()
-            .len();
-
-        let rattr = &attribute[index..index + len];
-        let rwi = &word_index[index..index + len];
-
-        (rattr, rwi)
-    };
-
-    let mut last = query_index_groups.next().map(|group| {
-        let attr_wi = get_attr_wi(index, group.len());
-        index += group.len();
-        attr_wi
-    });
-
-    // iter by windows of size 2
-    while let (Some(lhs), Some(rhs)) = (last, query_index_groups.next()) {
-        let attr_wi = get_attr_wi(index, rhs.len());
-        proximity += min_proximity(lhs, attr_wi);
-        last = Some(attr_wi);
-        index += rhs.len();
-    }
-
-    proximity
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct WordsProximity;
-
-impl Criterion for WordsProximity {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            let distance = lhs.distance();
-            let attribute = lhs.attribute();
-            let word_index = lhs.word_index();
-            matches_proximity(query_index, distance, attribute, word_index)
-        };
-
-        let rhs = {
-            let query_index = rhs.query_index();
-            let distance = rhs.distance();
-            let attribute = rhs.attribute();
-            let word_index = rhs.word_index();
-            matches_proximity(query_index, distance, attribute, word_index)
-        };
-
-        lhs.cmp(&rhs)
-    }
-
-    fn name(&self) -> &str {
-        "WordsProximity"
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn three_different_attributes() {
-        // "soup" "of the" "the day"
-        //
-        // { id: 0, attr: 0, attr_index: 0 }
-        // { id: 1, attr: 1, attr_index: 0 }
-        // { id: 2, attr: 1, attr_index: 1 }
-        // { id: 2, attr: 2, attr_index: 0 }
-        // { id: 3, attr: 3, attr_index: 1 }
-
-        let query_index = &[0, 1, 2, 2, 3];
-        let distance = &[0, 0, 0, 0, 0];
-        let attribute = &[0, 1, 1, 2, 3];
-        let word_index = &[0, 0, 1, 0, 1];
-
-        //   soup -> of = 8
-        // + of -> the  = 1
-        // + the -> day = 8 (not 1)
-        assert_eq!(
-            matches_proximity(query_index, distance, attribute, word_index),
-            17
-        );
-    }
-
-    #[test]
-    fn two_different_attributes() {
-        // "soup day" "soup of the day"
-        //
-        // { id: 0, attr: 0, attr_index: 0 }
-        // { id: 0, attr: 1, attr_index: 0 }
-        // { id: 1, attr: 1, attr_index: 1 }
-        // { id: 2, attr: 1, attr_index: 2 }
-        // { id: 3, attr: 0, attr_index: 1 }
-        // { id: 3, attr: 1, attr_index: 3 }
-
-        let query_index = &[0, 0, 1, 2, 3, 3];
-        let distance = &[0, 0, 0, 0, 0, 0];
-        let attribute = &[0, 1, 1, 1, 0, 1];
-        let word_index = &[0, 0, 1, 2, 1, 3];
-
-        //   soup -> of = 1
-        // + of -> the  = 1
-        // + the -> day = 1
-        assert_eq!(
-            matches_proximity(query_index, distance, attribute, word_index),
-            3
-        );
-    }
-}
--- a/meilisearch-core/src/lib.rs
+++ b/meilisearch-core/src/lib.rs
@ -3,7 +3,7 @@
 extern crate assert_matches;

 mod automaton;
-pub mod criterion;
+mod bucket_sort;
 mod database;
 mod distinct_map;
 mod error;
@ -12,11 +12,12 @@ mod number;
 mod query_builder;
 mod ranked_map;
 mod raw_document;
-pub mod raw_indexer;
 mod reordered_attrs;
+mod update;
+pub mod criterion;
+pub mod raw_indexer;
 pub mod serde;
 pub mod store;
-mod update;

 pub use self::database::{BoxUpdateFn, Database, MainT, UpdateT};
 pub use self::error::{Error, MResult};
@ -27,61 +28,105 @@ pub use self::store::Index;
 pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
 pub use meilisearch_types::{DocIndex, DocumentId, Highlight};

-#[doc(hidden)]
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct TmpMatch {
-    pub query_index: u32,
-    pub distance: u8,
-    pub attribute: u16,
-    pub word_index: u16,
-    pub is_exact: bool,
-}
+use compact_arena::SmallArena;
+use crate::bucket_sort::{QueryWordAutomaton, PostingsListView};
+use crate::levenshtein::prefix_damerau_levenshtein;
+use crate::reordered_attrs::ReorderedAttrs;

-#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
+#[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord)]
 pub struct Document {
    pub id: DocumentId,
    pub highlights: Vec<Highlight>,

    #[cfg(test)]
-    pub matches: Vec<TmpMatch>,
+    pub matches: Vec<crate::bucket_sort::SimpleMatch>,
+}
+
+fn highlights_from_raw_document<'a, 'tag, 'txn>(
+    raw_document: &RawDocument<'a, 'tag>,
+    automatons: &[QueryWordAutomaton],
+    arena: &SmallArena<'tag, PostingsListView<'txn>>,
+    searchable_attrs: Option<&ReorderedAttrs>,
+) -> Vec<Highlight>
+{
+    let mut highlights = Vec::new();
+
+    for bm in raw_document.bare_matches.iter() {
+        let postings_list = &arena[bm.postings_list];
+        let input = postings_list.input();
+        let query = &automatons[bm.query_index as usize].query;
+
+        for di in postings_list.iter() {
+            let covered_area = if query.len() > input.len() {
+                input.len()
+            } else {
+                prefix_damerau_levenshtein(query.as_bytes(), input).1
+            };
+
+            let attribute = searchable_attrs
+                .and_then(|sa| sa.reverse(di.attribute))
+                .unwrap_or(di.attribute);
+
+            let highlight = Highlight {
+                attribute: attribute,
+                char_index: di.char_index,
+                char_length: covered_area as u16,
+            };
+
+            highlights.push(highlight);
+        }
+    }
+
+    highlights
 }

 impl Document {
    #[cfg(not(test))]
-    fn from_raw(raw: RawDocument) -> Document {
-        Document {
-            id: raw.id,
-            highlights: raw.highlights,
-        }
+    pub fn from_raw<'a, 'tag, 'txn>(
+        raw_document: RawDocument<'a, 'tag>,
+        automatons: &[QueryWordAutomaton],
+        arena: &SmallArena<'tag, PostingsListView<'txn>>,
+        searchable_attrs: Option<&ReorderedAttrs>,
+    ) -> Document
+    {
+        let highlights = highlights_from_raw_document(
+            &raw_document,
+            automatons,
+            arena,
+            searchable_attrs,
+        );
+
+        Document { id: raw_document.id, highlights }
    }

    #[cfg(test)]
-    fn from_raw(raw: RawDocument) -> Document {
-        let len = raw.query_index().len();
-        let mut matches = Vec::with_capacity(len);
+    pub fn from_raw<'a, 'tag, 'txn>(
+        raw_document: RawDocument<'a, 'tag>,
+        automatons: &[QueryWordAutomaton],
+        arena: &SmallArena<'tag, PostingsListView<'txn>>,
+        searchable_attrs: Option<&ReorderedAttrs>,
+    ) -> Document
+    {
+        use crate::bucket_sort::SimpleMatch;

-        let query_index = raw.query_index();
-        let distance = raw.distance();
-        let attribute = raw.attribute();
-        let word_index = raw.word_index();
-        let is_exact = raw.is_exact();
+        let highlights = highlights_from_raw_document(
+            &raw_document,
+            automatons,
+            arena,
+            searchable_attrs,
+        );

-        for i in 0..len {
-            let match_ = TmpMatch {
-                query_index: query_index[i],
-                distance: distance[i],
-                attribute: attribute[i],
-                word_index: word_index[i],
-                is_exact: is_exact[i],
-            };
-            matches.push(match_);
+        let mut matches = Vec::new();
+        for sm in raw_document.processed_matches {
+            let attribute = searchable_attrs
+                .and_then(|sa| sa.reverse(sm.attribute))
+                .unwrap_or(sm.attribute);
+
+            matches.push(SimpleMatch { attribute, ..sm });
        }
+        matches.sort_unstable();

-        Document {
-            id: raw.id,
-            matches,
-            highlights: raw.highlights,
-        }
+        Document { id: raw_document.id, highlights, matches }
    }
 }

--- a/meilisearch-core/src/query_builder.rs
+++ b/meilisearch-core/src/query_builder.rs
--- a/meilisearch-core/src/query_enhancer.rs
+++ b/meilisearch-core/src/query_enhancer.rs
@ -1,398 +0,0 @@
-use std::ops::Range;
-use std::cmp::Ordering::{Less, Greater, Equal};
-
-/// Return `true` if the specified range can accept the given replacements words.
-/// Returns `false` if the replacements words are already present in the original query
-/// or if there is fewer replacement words than the range to replace.
-//
-//
-// ## Ignored because already present in original
-//
-//     new york city subway
-//     -------- ^^^^
-//   /          \
-//  [new york city]
-//
-//
-// ## Ignored because smaller than the original
-//
-//   new york city subway
-//   -------------
-//   \          /
-//    [new york]
-//
-//
-// ## Accepted because bigger than the original
-//
-//        NYC subway
-//        ---
-//       /   \
-//      /     \
-//     /       \
-//    /         \
-//   /           \
-//  [new york city]
-//
-fn rewrite_range_with<S, T>(query: &[S], range: Range<usize>, words: &[T]) -> bool
-where S: AsRef<str>,
-      T: AsRef<str>,
-{
-    if words.len() <= range.len() {
-        // there is fewer or equal replacement words
-        // than there is already in the replaced range
-        return false
-    }
-
-    // retrieve the part to rewrite but with the length
-    // of the replacement part
-    let original = query.iter().skip(range.start).take(words.len());
-
-    // check if the original query doesn't already contain
-    // the replacement words
-    !original.map(AsRef::as_ref).eq(words.iter().map(AsRef::as_ref))
-}
-
-type Origin = usize;
-type RealLength = usize;
-
-struct FakeIntervalTree {
-    intervals: Vec<(Range<usize>, (Origin, RealLength))>,
-}
-
-impl FakeIntervalTree {
-    fn new(mut intervals: Vec<(Range<usize>, (Origin, RealLength))>) -> FakeIntervalTree {
-        intervals.sort_unstable_by_key(|(r, _)| (r.start, r.end));
-        FakeIntervalTree { intervals }
-    }
-
-    fn query(&self, point: usize) -> Option<(Range<usize>, (Origin, RealLength))> {
-        let element = self.intervals.binary_search_by(|(r, _)| {
-            if point >= r.start {
-                if point < r.end { Equal } else { Less }
-            } else { Greater }
-        });
-
-        let n = match element { Ok(n) => n, Err(n) => n };
-
-        match self.intervals.get(n) {
-            Some((range, value)) if range.contains(&point) => Some((range.clone(), *value)),
-            _otherwise => None,
-        }
-    }
-}
-
-pub struct QueryEnhancerBuilder<'a, S> {
-    query: &'a [S],
-    origins: Vec<usize>,
-    real_to_origin: Vec<(Range<usize>, (Origin, RealLength))>,
-}
-
-impl<S: AsRef<str>> QueryEnhancerBuilder<'_, S> {
-    pub fn new(query: &[S]) -> QueryEnhancerBuilder<S> {
-        // we initialize origins query indices based on their positions
-        let origins: Vec<_> = (0..query.len() + 1).collect();
-        let real_to_origin = origins.iter().map(|&o| (o..o+1, (o, 1))).collect();
-
-        QueryEnhancerBuilder { query, origins, real_to_origin }
-    }
-
-    /// Update the final real to origin query indices mapping.
-    ///
-    /// `range` is the original words range that this `replacement` words replace
-    /// and `real` is the first real query index of these replacement words.
-    pub fn declare<T>(&mut self, range: Range<usize>, real: usize, replacement: &[T])
-    where T: AsRef<str>,
-    {
-        // check if the range of original words
-        // can be rewritten with the replacement words
-        if rewrite_range_with(self.query, range.clone(), replacement) {
-
-            // this range can be replaced so we need to
-            // modify the origins accordingly
-            let offset = replacement.len() - range.len();
-
-            let previous_padding = self.origins[range.end - 1];
-            let current_offset = (self.origins[range.end] - 1) - previous_padding;
-            let diff = offset.saturating_sub(current_offset);
-            self.origins[range.end] += diff;
-
-            for r in &mut self.origins[range.end + 1..] {
-                *r += diff;
-            }
-        }
-
-        // we need to store the real number and origins relations
-        // this way it will be possible to know by how many
-        // we need to pad real query indices
-        let real_range = real..real + replacement.len().max(range.len());
-        let real_length = replacement.len();
-        self.real_to_origin.push((real_range, (range.start, real_length)));
-    }
-
-    pub fn build(self) -> QueryEnhancer {
-        QueryEnhancer {
-            origins: self.origins,
-            real_to_origin: FakeIntervalTree::new(self.real_to_origin),
-        }
-    }
-}
-
-pub struct QueryEnhancer {
-    origins: Vec<usize>,
-    real_to_origin: FakeIntervalTree,
-}
-
-impl QueryEnhancer {
-    /// Returns the query indices to use to replace this real query index.
-    pub fn replacement(&self, real: u32) -> Range<u32> {
-        let real = real as usize;
-
-        // query the fake interval tree with the real query index
-        let (range, (origin, real_length)) =
-            self.real_to_origin
-                .query(real)
-                .expect("real has never been declared");
-
-        // if `real` is the end bound of the range
-        if (range.start + real_length - 1) == real {
-            let mut count = range.len();
-            let mut new_origin = origin;
-            for (i, slice) in self.origins[new_origin..].windows(2).enumerate() {
-                let len = slice[1] - slice[0];
-                count = count.saturating_sub(len);
-                if count == 0 { new_origin = origin + i; break }
-            }
-
-            let n = real - range.start;
-            let start = self.origins[origin];
-            let end = self.origins[new_origin + 1];
-            let remaining = (end - start) - n;
-
-            Range { start: (start + n) as u32, end: (start + n + remaining) as u32 }
-
-        } else {
-            // just return the origin along with
-            // the real position of the word
-            let n = real as usize - range.start;
-            let origin = self.origins[origin];
-
-            Range { start: (origin + n) as u32, end: (origin + n + 1) as u32 }
-        }
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn original_unmodified() {
-        let query = ["new", "york", "city", "subway"];
-        //             0       1       2        3
-        let mut builder = QueryEnhancerBuilder::new(&query);
-
-        // new york = new york city
-        builder.declare(0..2, 4, &["new", "york", "city"]);
-        //                    ^      4       5       6
-
-        let enhancer = builder.build();
-
-        assert_eq!(enhancer.replacement(0), 0..1); // new
-        assert_eq!(enhancer.replacement(1), 1..2); // york
-        assert_eq!(enhancer.replacement(2), 2..3); // city
-        assert_eq!(enhancer.replacement(3), 3..4); // subway
-        assert_eq!(enhancer.replacement(4), 0..1); // new
-        assert_eq!(enhancer.replacement(5), 1..2); // york
-        assert_eq!(enhancer.replacement(6), 2..3); // city
-    }
-
-    #[test]
-    fn simple_growing() {
-        let query = ["new", "york", "subway"];
-        //             0       1        2
-        let mut builder = QueryEnhancerBuilder::new(&query);
-
-        // new york = new york city
-        builder.declare(0..2, 3, &["new", "york", "city"]);
-        //                    ^      3       4       5
-
-        let enhancer = builder.build();
-
-        assert_eq!(enhancer.replacement(0), 0..1); // new
-        assert_eq!(enhancer.replacement(1), 1..3); // york
-        assert_eq!(enhancer.replacement(2), 3..4); // subway
-        assert_eq!(enhancer.replacement(3), 0..1); // new
-        assert_eq!(enhancer.replacement(4), 1..2); // york
-        assert_eq!(enhancer.replacement(5), 2..3); // city
-    }
-
-    #[test]
-    fn same_place_growings() {
-        let query = ["NY", "subway"];
-        //             0       1
-        let mut builder = QueryEnhancerBuilder::new(&query);
-
-        // NY = new york
-        builder.declare(0..1, 2, &["new", "york"]);
-        //                    ^      2       3
-
-        // NY = new york city
-        builder.declare(0..1, 4, &["new", "york", "city"]);
-        //                    ^      4       5       6
-
-        // NY = NYC
-        builder.declare(0..1, 7, &["NYC"]);
-        //                    ^      7
-
-        // NY = new york city
-        builder.declare(0..1, 8, &["new", "york", "city"]);
-        //                    ^      8       9      10
-
-        // subway = underground train
-        builder.declare(1..2, 11, &["underground", "train"]);
-        //                    ^          11          12
-
-        let enhancer = builder.build();
-
-        assert_eq!(enhancer.replacement(0), 0..3); // NY
-        assert_eq!(enhancer.replacement(1), 3..5); // subway
-        assert_eq!(enhancer.replacement(2), 0..1); // new
-        assert_eq!(enhancer.replacement(3), 1..3); // york
-        assert_eq!(enhancer.replacement(4), 0..1); // new
-        assert_eq!(enhancer.replacement(5), 1..2); // york
-        assert_eq!(enhancer.replacement(6), 2..3); // city
-        assert_eq!(enhancer.replacement(7), 0..3); // NYC
-        assert_eq!(enhancer.replacement(8), 0..1); // new
-        assert_eq!(enhancer.replacement(9), 1..2); // york
-        assert_eq!(enhancer.replacement(10), 2..3); // city
-        assert_eq!(enhancer.replacement(11), 3..4); // underground
-        assert_eq!(enhancer.replacement(12), 4..5); // train
-    }
-
-    #[test]
-    fn bigger_growing() {
-        let query = ["NYC", "subway"];
-        //             0        1
-        let mut builder = QueryEnhancerBuilder::new(&query);
-
-        // NYC = new york city
-        builder.declare(0..1, 2, &["new", "york", "city"]);
-        //                    ^      2       3       4
-
-        let enhancer = builder.build();
-
-        assert_eq!(enhancer.replacement(0), 0..3); // NYC
-        assert_eq!(enhancer.replacement(1), 3..4); // subway
-        assert_eq!(enhancer.replacement(2), 0..1); // new
-        assert_eq!(enhancer.replacement(3), 1..2); // york
-        assert_eq!(enhancer.replacement(4), 2..3); // city
-    }
-
-    #[test]
-    fn middle_query_growing() {
-        let query = ["great", "awesome", "NYC", "subway"];
-        //              0         1        2        3
-        let mut builder = QueryEnhancerBuilder::new(&query);
-
-        // NYC = new york city
-        builder.declare(2..3, 4, &["new", "york", "city"]);
-        //                    ^      4       5       6
-
-        let enhancer = builder.build();
-
-        assert_eq!(enhancer.replacement(0), 0..1); // great
-        assert_eq!(enhancer.replacement(1), 1..2); // awesome
-        assert_eq!(enhancer.replacement(2), 2..5); // NYC
-        assert_eq!(enhancer.replacement(3), 5..6); // subway
-        assert_eq!(enhancer.replacement(4), 2..3); // new
-        assert_eq!(enhancer.replacement(5), 3..4); // york
-        assert_eq!(enhancer.replacement(6), 4..5); // city
-    }
-
-    #[test]
-    fn end_query_growing() {
-        let query = ["NYC", "subway"];
-        //             0        1
-        let mut builder = QueryEnhancerBuilder::new(&query);
-
-        // NYC = new york city
-        builder.declare(1..2, 2, &["underground", "train"]);
-        //                    ^         2            3
-
-        let enhancer = builder.build();
-
-        assert_eq!(enhancer.replacement(0), 0..1); // NYC
-        assert_eq!(enhancer.replacement(1), 1..3); // subway
-        assert_eq!(enhancer.replacement(2), 1..2); // underground
-        assert_eq!(enhancer.replacement(3), 2..3); // train
-    }
-
-    #[test]
-    fn multiple_growings() {
-        let query = ["great", "awesome", "NYC", "subway"];
-        //              0         1        2        3
-        let mut builder = QueryEnhancerBuilder::new(&query);
-
-        // NYC = new york city
-        builder.declare(2..3, 4, &["new", "york", "city"]);
-        //                    ^      4       5       6
-
-        // subway = underground train
-        builder.declare(3..4, 7, &["underground", "train"]);
-        //                    ^          7           8
-
-        let enhancer = builder.build();
-
-        assert_eq!(enhancer.replacement(0), 0..1); // great
-        assert_eq!(enhancer.replacement(1), 1..2); // awesome
-        assert_eq!(enhancer.replacement(2), 2..5); // NYC
-        assert_eq!(enhancer.replacement(3), 5..7); // subway
-        assert_eq!(enhancer.replacement(4), 2..3); // new
-        assert_eq!(enhancer.replacement(5), 3..4); // york
-        assert_eq!(enhancer.replacement(6), 4..5); // city
-        assert_eq!(enhancer.replacement(7), 5..6); // underground
-        assert_eq!(enhancer.replacement(8), 6..7); // train
-    }
-
-    #[test]
-    fn multiple_probable_growings() {
-        let query = ["great", "awesome", "NYC", "subway"];
-        //              0         1        2        3
-        let mut builder = QueryEnhancerBuilder::new(&query);
-
-        // NYC = new york city
-        builder.declare(2..3, 4, &["new", "york", "city"]);
-        //                    ^      4       5       6
-
-        // subway = underground train
-        builder.declare(3..4, 7, &["underground", "train"]);
-        //                    ^          7           8
-
-        // great awesome = good
-        builder.declare(0..2, 9, &["good"]);
-        //                    ^       9
-
-        // awesome NYC = NY
-        builder.declare(1..3, 10, &["NY"]);
-        //                    ^^     10
-
-        // NYC subway = metro
-        builder.declare(2..4, 11, &["metro"]);
-        //                    ^^      11
-
-        let enhancer = builder.build();
-
-        assert_eq!(enhancer.replacement(0),  0..1); // great
-        assert_eq!(enhancer.replacement(1),  1..2); // awesome
-        assert_eq!(enhancer.replacement(2),  2..5); // NYC
-        assert_eq!(enhancer.replacement(3),  5..7); // subway
-        assert_eq!(enhancer.replacement(4),  2..3); // new
-        assert_eq!(enhancer.replacement(5),  3..4); // york
-        assert_eq!(enhancer.replacement(6),  4..5); // city
-        assert_eq!(enhancer.replacement(7),  5..6); // underground
-        assert_eq!(enhancer.replacement(8),  6..7); // train
-        assert_eq!(enhancer.replacement(9),  0..2); // good
-        assert_eq!(enhancer.replacement(10), 1..5); // NY
-        assert_eq!(enhancer.replacement(11), 2..5); // metro
-    }
-}
--- a/meilisearch-core/src/raw_document.rs
+++ b/meilisearch-core/src/raw_document.rs
@ -1,186 +1,111 @@
-use std::fmt;
-use std::sync::Arc;
-
-use meilisearch_schema::SchemaAttr;
+use compact_arena::SmallArena;
+use itertools::EitherOrBoth;
 use sdset::SetBuf;
-use slice_group_by::GroupBy;
+use crate::DocIndex;
+use crate::bucket_sort::{SimpleMatch, BareMatch, QueryWordAutomaton, PostingsListView};
+use crate::reordered_attrs::ReorderedAttrs;

-use crate::{DocumentId, Highlight, TmpMatch};
-
-#[derive(Clone)]
-pub struct RawDocument {
-    pub id: DocumentId,
-    pub matches: SharedMatches,
-    pub highlights: Vec<Highlight>,
-    pub fields_counts: SetBuf<(SchemaAttr, u64)>,
+pub struct RawDocument<'a, 'tag> {
+    pub id: crate::DocumentId,
+    pub bare_matches: &'a mut [BareMatch<'tag>],
+    pub processed_matches: Vec<SimpleMatch>,
+    /// The list of minimum `distance` found
+    pub processed_distances: Vec<Option<u8>>,
+    /// Does this document contains a field
+    /// with one word that is exactly matching
+    pub contains_one_word_field: bool,
 }

-impl RawDocument {
-    pub fn query_index(&self) -> &[u32] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe {
-            &self
-                .matches
-                .matches
-                .query_index
-                .get_unchecked(r.start..r.end)
-        }
-    }
+impl<'a, 'tag> RawDocument<'a, 'tag> {
+    pub fn new<'txn>(
+        bare_matches: &'a mut [BareMatch<'tag>],
+        automatons: &[QueryWordAutomaton],
+        postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
+        searchable_attrs: Option<&ReorderedAttrs>,
+    ) -> Option<RawDocument<'a, 'tag>>
+    {
+        if let Some(reordered_attrs) = searchable_attrs {
+            for bm in bare_matches.iter() {
+                let postings_list = &postings_lists[bm.postings_list];

-    pub fn distance(&self) -> &[u8] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
-    }
+                let mut rewritten = Vec::new();
+                for di in postings_list.iter() {
+                    if let Some(attribute) = reordered_attrs.get(di.attribute) {
+                        rewritten.push(DocIndex { attribute, ..*di });
+                    }
+                }

-    pub fn attribute(&self) -> &[u16] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
-    }
-
-    pub fn word_index(&self) -> &[u16] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe {
-            &self
-                .matches
-                .matches
-                .word_index
-                .get_unchecked(r.start..r.end)
-        }
-    }
-
-    pub fn is_exact(&self) -> &[bool] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
-    }
-}
-
-impl fmt::Debug for RawDocument {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.write_str("RawDocument {\r\n")?;
-        f.write_fmt(format_args!("{:>15}: {:?},\r\n", "id", self.id))?;
-        f.write_fmt(format_args!(
-            "{:>15}: {:^5?},\r\n",
-            "query_index",
-            self.query_index()
-        ))?;
-        f.write_fmt(format_args!(
-            "{:>15}: {:^5?},\r\n",
-            "distance",
-            self.distance()
-        ))?;
-        f.write_fmt(format_args!(
-            "{:>15}: {:^5?},\r\n",
-            "attribute",
-            self.attribute()
-        ))?;
-        f.write_fmt(format_args!(
-            "{:>15}: {:^5?},\r\n",
-            "word_index",
-            self.word_index()
-        ))?;
-        f.write_fmt(format_args!(
-            "{:>15}: {:^5?},\r\n",
-            "is_exact",
-            self.is_exact()
-        ))?;
-        f.write_str("}")?;
-        Ok(())
-    }
-}
-
-pub fn raw_documents_from(
-    matches: SetBuf<(DocumentId, TmpMatch)>,
-    highlights: SetBuf<(DocumentId, Highlight)>,
-    fields_counts: SetBuf<(DocumentId, SchemaAttr, u64)>,
-) -> Vec<RawDocument> {
-    let mut docs_ranges: Vec<(_, Range, _, _)> = Vec::new();
-    let mut matches2 = Matches::with_capacity(matches.len());
-
-    let matches = matches.linear_group_by_key(|(id, _)| *id);
-    let highlights = highlights.linear_group_by_key(|(id, _)| *id);
-    let fields_counts = fields_counts.linear_group_by_key(|(id, _, _)| *id);
-
-    for ((mgroup, hgroup), fgroup) in matches.zip(highlights).zip(fields_counts) {
-        debug_assert_eq!(mgroup[0].0, hgroup[0].0);
-        debug_assert_eq!(mgroup[0].0, fgroup[0].0);
-
-        let document_id = mgroup[0].0;
-        let start = docs_ranges.last().map(|(_, r, _, _)| r.end).unwrap_or(0);
-        let end = start + mgroup.len();
-        let highlights = hgroup.iter().map(|(_, h)| *h).collect();
-        let fields_counts = SetBuf::new(fgroup.iter().map(|(_, a, c)| (*a, *c)).collect()).unwrap();
-
-        docs_ranges.push((document_id, Range { start, end }, highlights, fields_counts));
-        matches2.extend_from_slice(mgroup);
-    }
-
-    let matches = Arc::new(matches2);
-    docs_ranges
-        .into_iter()
-        .map(|(id, range, highlights, fields_counts)| {
-            let matches = SharedMatches {
-                range,
-                matches: matches.clone(),
-            };
-            RawDocument {
-                id,
-                matches,
-                highlights,
-                fields_counts,
+                let new_postings = SetBuf::from_dirty(rewritten);
+                postings_lists[bm.postings_list].rewrite_with(new_postings);
            }
+        }
+
+        bare_matches.sort_unstable_by_key(|m| m.query_index);
+
+        let mut previous_word = None;
+        for i in 0..bare_matches.len() {
+            let a = &bare_matches[i];
+            let auta = &automatons[a.query_index as usize];
+
+            match auta.phrase_query {
+                Some((0, _)) => {
+                    let b = match bare_matches.get(i + 1) {
+                        Some(b) => b,
+                        None => {
+                            postings_lists[a.postings_list].rewrite_with(SetBuf::default());
+                            continue;
+                        }
+                    };
+
+                    if a.query_index + 1 != b.query_index {
+                        postings_lists[a.postings_list].rewrite_with(SetBuf::default());
+                        continue
+                    }
+
+                    let pla = &postings_lists[a.postings_list];
+                    let plb = &postings_lists[b.postings_list];
+
+                    let iter = itertools::merge_join_by(pla.iter(), plb.iter(), |a, b| {
+                        a.attribute.cmp(&b.attribute).then((a.word_index + 1).cmp(&b.word_index))
+                    });
+
+                    let mut newa = Vec::new();
+                    let mut newb = Vec::new();
+
+                    for eb in iter {
+                        if let EitherOrBoth::Both(a, b) = eb {
+                            newa.push(*a);
+                            newb.push(*b);
+                        }
+                    }
+
+                    if !newa.is_empty() {
+                        previous_word = Some(a.query_index);
+                    }
+
+                    postings_lists[a.postings_list].rewrite_with(SetBuf::new_unchecked(newa));
+                    postings_lists[b.postings_list].rewrite_with(SetBuf::new_unchecked(newb));
+                },
+                Some((1, _)) => {
+                    if previous_word.take() != Some(a.query_index - 1) {
+                        postings_lists[a.postings_list].rewrite_with(SetBuf::default());
+                    }
+                },
+                Some((_, _)) => unreachable!(),
+                None => (),
+            }
+        }
+
+        if bare_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) {
+            return None
+        }
+
+        Some(RawDocument {
+            id: bare_matches[0].document_id,
+            bare_matches,
+            processed_matches: Vec::new(),
+            processed_distances: Vec::new(),
+            contains_one_word_field: false,
        })
-        .collect()
-}
-
-#[derive(Debug, Copy, Clone)]
-struct Range {
-    start: usize,
-    end: usize,
-}
-
-#[derive(Clone)]
-pub struct SharedMatches {
-    range: Range,
-    matches: Arc<Matches>,
-}
-
-#[derive(Clone)]
-struct Matches {
-    query_index: Vec<u32>,
-    distance: Vec<u8>,
-    attribute: Vec<u16>,
-    word_index: Vec<u16>,
-    is_exact: Vec<bool>,
-}
-
-impl Matches {
-    fn with_capacity(cap: usize) -> Matches {
-        Matches {
-            query_index: Vec::with_capacity(cap),
-            distance: Vec::with_capacity(cap),
-            attribute: Vec::with_capacity(cap),
-            word_index: Vec::with_capacity(cap),
-            is_exact: Vec::with_capacity(cap),
-        }
-    }
-
-    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch)]) {
-        for (_, match_) in matches {
-            self.query_index.push(match_.query_index);
-            self.distance.push(match_.distance);
-            self.attribute.push(match_.attribute);
-            self.word_index.push(match_.word_index);
-            self.is_exact.push(match_.is_exact);
-        }
    }
 }
--- a/meilisearch-core/src/reordered_attrs.rs
+++ b/meilisearch-core/src/reordered_attrs.rs
@ -1,27 +1,31 @@
+use std::cmp;
+
 #[derive(Default, Clone)]
 pub struct ReorderedAttrs {
-    count: usize,
    reorders: Vec<Option<u16>>,
+    reverse: Vec<u16>,
 }

 impl ReorderedAttrs {
    pub fn new() -> ReorderedAttrs {
-        ReorderedAttrs {
-            count: 0,
-            reorders: Vec::new(),
-        }
+        ReorderedAttrs { reorders: Vec::new(), reverse: Vec::new() }
    }

    pub fn insert_attribute(&mut self, attribute: u16) {
-        self.reorders.resize(attribute as usize + 1, None);
-        self.reorders[attribute as usize] = Some(self.count as u16);
-        self.count += 1;
+        let new_len = cmp::max(attribute as usize + 1, self.reorders.len());
+        self.reorders.resize(new_len, None);
+        self.reorders[attribute as usize] = Some(self.reverse.len() as u16);
+        self.reverse.push(attribute);
    }

    pub fn get(&self, attribute: u16) -> Option<u16> {
-        match self.reorders.get(attribute as usize) {
-            Some(Some(attribute)) => Some(*attribute),
-            _ => None,
+        match self.reorders.get(attribute as usize)? {
+            Some(attribute) => Some(*attribute),
+            None => None,
        }
    }
+
+    pub fn reverse(&self, attribute: u16) -> Option<u16> {
+        self.reverse.get(attribute as usize).copied()
+    }
 }
--- a/meilisearch-core/src/serde/serializer.rs
+++ b/meilisearch-core/src/serde/serializer.rs
@ -325,7 +325,7 @@ where
                txn,
                document_id,
                attribute,
-                number_of_words as u64,
+                number_of_words as u16,
            )?;
        }
    }
--- a/meilisearch-core/src/store/documents_fields_counts.rs
+++ b/meilisearch-core/src/store/documents_fields_counts.rs
@ -7,7 +7,7 @@ use meilisearch_schema::SchemaAttr;

 #[derive(Copy, Clone)]
 pub struct DocumentsFieldsCounts {
-    pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentAttrKey>, OwnedType<u64>>,
+    pub(crate) documents_fields_counts: heed::Database<OwnedType<DocumentAttrKey>, OwnedType<u16>>,
 }

 impl DocumentsFieldsCounts {
@ -16,7 +16,7 @@ impl DocumentsFieldsCounts {
        writer: &mut heed::RwTxn<MainT>,
        document_id: DocumentId,
        attribute: SchemaAttr,
-        value: u64,
+        value: u16,
    ) -> ZResult<()> {
        let key = DocumentAttrKey::new(document_id, attribute);
        self.documents_fields_counts.put(writer, &key, &value)
@ -42,7 +42,7 @@ impl DocumentsFieldsCounts {
        reader: &heed::RoTxn<MainT>,
        document_id: DocumentId,
        attribute: SchemaAttr,
-    ) -> ZResult<Option<u64>> {
+    ) -> ZResult<Option<u16>> {
        let key = DocumentAttrKey::new(document_id, attribute);
        match self.documents_fields_counts.get(reader, &key)? {
            Some(count) => Ok(Some(count)),
@ -79,11 +79,11 @@ impl DocumentsFieldsCounts {
 }

 pub struct DocumentFieldsCountsIter<'txn> {
-    iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
+    iter: heed::RoRange<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
 }

 impl Iterator for DocumentFieldsCountsIter<'_> {
-    type Item = ZResult<(SchemaAttr, u64)>;
+    type Item = ZResult<(SchemaAttr, u16)>;

    fn next(&mut self) -> Option<Self::Item> {
        match self.iter.next() {
@ -99,7 +99,7 @@ impl Iterator for DocumentFieldsCountsIter<'_> {

 pub struct DocumentsIdsIter<'txn> {
    last_seen_id: Option<DocumentId>,
-    iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
+    iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
 }

 impl Iterator for DocumentsIdsIter<'_> {
@ -123,11 +123,11 @@ impl Iterator for DocumentsIdsIter<'_> {
 }

 pub struct AllDocumentsFieldsCountsIter<'txn> {
-    iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u64>>,
+    iter: heed::RoIter<'txn, OwnedType<DocumentAttrKey>, OwnedType<u16>>,
 }

 impl Iterator for AllDocumentsFieldsCountsIter<'_> {
-    type Item = ZResult<(DocumentId, SchemaAttr, u64)>;
+    type Item = ZResult<(DocumentId, SchemaAttr, u16)>;

    fn next(&mut self) -> Option<Self::Item> {
        match self.iter.next() {
--- a/meilisearch-http/src/helpers/meilisearch.rs
+++ b/meilisearch-http/src/helpers/meilisearch.rs
@ -310,11 +310,11 @@ impl<'a> SearchBuilder<'a> {
            if let Some(ranking_rules_order) = ranking_order {
                for rule in ranking_rules_order {
                    match rule.as_str() {
-                        "_sum_of_typos" => builder.push(SumOfTypos),
-                        "_number_of_words" => builder.push(NumberOfWords),
-                        "_word_proximity" => builder.push(WordsProximity),
-                        "_sum_of_words_attribute" => builder.push(SumOfWordsAttribute),
-                        "_sum_of_words_position" => builder.push(SumOfWordsPosition),
+                        "_typo" => builder.push(Typo),
+                        "_words" => builder.push(Words),
+                        "_proximity" => builder.push(Proximity),
+                        "_attribute" => builder.push(Attribute),
+                        "_words_position" => builder.push(WordsPosition),
                        "_exact" => builder.push(Exact),
                        _ => {
                            let order = match ranking_rules.get(rule.as_str()) {
@ -340,11 +340,11 @@ impl<'a> SearchBuilder<'a> {
                builder.push(DocumentId);
                return Ok(Some(builder.build()));
            } else {
-                builder.push(SumOfTypos);
-                builder.push(NumberOfWords);
-                builder.push(WordsProximity);
-                builder.push(SumOfWordsAttribute);
-                builder.push(SumOfWordsPosition);
+                builder.push(Typo);
+                builder.push(Words);
+                builder.push(Proximity);
+                builder.push(Attribute);
+                builder.push(WordsPosition);
                builder.push(Exact);
                for (rule, order) in ranking_rules.iter() {
                    let custom_ranking = match order {