Update the criteria to the new ones

2025-06-17 20:27:41 +02:00 · 2019-12-11 17:02:10 +01:00 · 2019-12-11 17:02:10 +01:00 · 248ccfc0d8
commit 248ccfc0d8
parent ea148575cf
20 changed files with 693 additions and 1775 deletions
--- a/meilisearch-core/src/bucket_sort.rs
+++ b/meilisearch-core/src/bucket_sort.rs
@ -1,9 +1,6 @@
 use std::ops::Deref;
 use std::fmt;
 use std::borrow::Cow;
-use std::cmp::Ordering;
-use std::collections::HashSet;
-use std::io::Write;
 use std::mem;
 use std::ops::Range;
 use std::rc::Rc;
@ -17,15 +14,15 @@ use meilisearch_tokenizer::{is_cjk, split_query_string};
 use meilisearch_types::{DocIndex, Highlight};
 use sdset::{Set, SetBuf};
 use slice_group_by::{GroupBy, GroupByMut};
-use itertools::EitherOrBoth;

 use crate::automaton::NGRAMS;
 use crate::automaton::{QueryEnhancer, QueryEnhancerBuilder};
 use crate::automaton::{build_dfa, build_prefix_dfa, build_exact_dfa};
 use crate::automaton::{normalize_str, split_best_frequency};

-use crate::criterion2::*;
+use crate::criterion::Criteria;
 use crate::levenshtein::prefix_damerau_levenshtein;
+use crate::raw_document::RawDocument;
 use crate::{database::MainT, reordered_attrs::ReorderedAttrs};
 use crate::{store, Document, DocumentId, MResult};

@ -33,6 +30,7 @@ pub fn bucket_sort<'c>(
    reader: &heed::RoTxn<MainT>,
    query: &str,
    range: Range<usize>,
+    criteria: Criteria<'c>,
    main_store: store::Main,
    postings_lists_store: store::PostingsLists,
    documents_fields_counts_store: store::DocumentsFieldsCounts,
@ -76,17 +74,7 @@ pub fn bucket_sort<'c>(

    let mut groups = vec![raw_documents.as_mut_slice()];

-    let criteria = [
-        Box::new(Typo) as Box<dyn Criterion>,
-        Box::new(Words),
-        Box::new(Proximity),
-        Box::new(Attribute),
-        Box::new(WordsPosition),
-        Box::new(Exact),
-        Box::new(StableDocId),
-    ];
-
-    'criteria: for criterion in &criteria {
+    'criteria: for criterion in criteria.as_ref() {
        let tmp_groups = mem::replace(&mut groups, Vec::new());
        let mut documents_seen = 0;

@ -131,7 +119,7 @@ pub fn bucket_sort<'c>(
        }).collect();

        Document {
-            id: d.raw_matches[0].document_id,
+            id: d.id,
            highlights,
            #[cfg(test)] matches: Vec::new(),
        }
@ -140,88 +128,6 @@ pub fn bucket_sort<'c>(
    Ok(iter.collect())
 }

-pub struct RawDocument<'a, 'tag> {
-    pub raw_matches: &'a mut [BareMatch<'tag>],
-    pub processed_matches: Vec<SimpleMatch>,
-    /// The list of minimum `distance` found
-    pub processed_distances: Vec<Option<u8>>,
-}
-
-impl<'a, 'tag> RawDocument<'a, 'tag> {
-    fn new<'txn>(
-        raw_matches: &'a mut [BareMatch<'tag>],
-        automatons: &[QueryWordAutomaton],
-        postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
-    ) -> Option<RawDocument<'a, 'tag>>
-    {
-        raw_matches.sort_unstable_by_key(|m| m.query_index);
-
-        let mut previous_word = None;
-        for i in 0..raw_matches.len() {
-            let a = &raw_matches[i];
-            let auta = &automatons[a.query_index as usize];
-
-            match auta.phrase_query {
-                Some((0, _)) => {
-                    let b = match raw_matches.get(i + 1) {
-                        Some(b) => b,
-                        None => {
-                            postings_lists[a.postings_list].rewrite_with(SetBuf::default());
-                            continue;
-                        }
-                    };
-
-                    if a.query_index + 1 != b.query_index {
-                        postings_lists[a.postings_list].rewrite_with(SetBuf::default());
-                        continue
-                    }
-
-                    let pla = &postings_lists[a.postings_list];
-                    let plb = &postings_lists[b.postings_list];
-
-                    let mut iter = itertools::merge_join_by(pla.iter(), plb.iter(), |a, b| {
-                        a.attribute.cmp(&b.attribute).then((a.word_index + 1).cmp(&b.word_index))
-                    });
-
-                    let mut newa = Vec::new();
-                    let mut newb = Vec::new();
-
-                    for eb in iter {
-                        if let EitherOrBoth::Both(a, b) = eb {
-                            newa.push(*a);
-                            newb.push(*b);
-                        }
-                    }
-
-                    if !newa.is_empty() {
-                        previous_word = Some(a.query_index);
-                    }
-
-                    postings_lists[a.postings_list].rewrite_with(SetBuf::new_unchecked(newa));
-                    postings_lists[b.postings_list].rewrite_with(SetBuf::new_unchecked(newb));
-                },
-                Some((1, _)) => {
-                    if previous_word.take() != Some(a.query_index - 1) {
-                        postings_lists[a.postings_list].rewrite_with(SetBuf::default());
-                    }
-                },
-                Some((_, _)) => unreachable!(),
-                None => (),
-            }
-        }
-
-        if raw_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) {
-            return None
-        }
-
-        Some(RawDocument {
-            raw_matches,
-            processed_matches: Vec::new(),
-            processed_distances: Vec::new(),
-        })
-    }
-}
-
 pub struct BareMatch<'tag> {
    pub document_id: DocumentId,
    pub query_index: u16,
--- a/meilisearch-core/src/criterion/attribute.rs
+++ b/meilisearch-core/src/criterion/attribute.rs
@ -0,0 +1,48 @@
+use std::cmp::{self, Ordering};
+
+use compact_arena::SmallArena;
+use slice_group_by::GroupBy;
+
+use crate::automaton::QueryEnhancer;
+use crate::bucket_sort::{SimpleMatch, PostingsListView, QueryWordAutomaton};
+use crate::RawDocument;
+
+use super::{Criterion, prepare_raw_matches};
+
+pub struct Attribute;
+
+impl Criterion for Attribute {
+    fn name(&self) -> &str { "attribute" }
+
+    fn prepare<'a, 'tag, 'txn>(
+        &self,
+        documents: &mut [RawDocument<'a, 'tag>],
+        postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
+        query_enhancer: &QueryEnhancer,
+        automatons: &[QueryWordAutomaton],
+    ) {
+        prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
+    }
+
+    fn evaluate<'a, 'tag, 'txn>(
+        &self,
+        lhs: &RawDocument<'a, 'tag>,
+        rhs: &RawDocument<'a, 'tag>,
+        postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
+    ) -> Ordering
+    {
+        #[inline]
+        fn best_attribute(matches: &[SimpleMatch]) -> u16 {
+            let mut best_attribute = u16::max_value();
+            for group in matches.linear_group_by_key(|bm| bm.query_index) {
+                best_attribute = cmp::min(best_attribute, group[0].attribute);
+            }
+            best_attribute
+        }
+
+        let lhs = best_attribute(&lhs.processed_matches);
+        let rhs = best_attribute(&rhs.processed_matches);
+
+        lhs.cmp(&rhs)
+    }
+}
--- a/meilisearch-core/src/criterion/document_id.rs
+++ b/meilisearch-core/src/criterion/document_id.rs
@ -1,16 +1,37 @@
-use crate::criterion::Criterion;
-use crate::RawDocument;
 use std::cmp::Ordering;

-#[derive(Debug, Clone, Copy)]
+use compact_arena::SmallArena;
+
+use crate::automaton::QueryEnhancer;
+use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
+use crate::RawDocument;
+use super::Criterion;
+
 pub struct DocumentId;

 impl Criterion for DocumentId {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        lhs.id.cmp(&rhs.id)
+    fn name(&self) -> &str { "stable document id" }
+
+    fn prepare(
+        &self,
+        documents: &mut [RawDocument],
+        postings_lists: &mut SmallArena<PostingsListView>,
+        query_enhancer: &QueryEnhancer,
+        automatons: &[QueryWordAutomaton],
+    ) {
+        // ...
    }

-    fn name(&self) -> &str {
-        "DocumentId"
+    fn evaluate(
+        &self,
+        lhs: &RawDocument,
+        rhs: &RawDocument,
+        postings_lists: &SmallArena<PostingsListView>,
+    ) -> Ordering
+    {
+        let lhs = &lhs.id;
+        let rhs = &rhs.id;
+
+        lhs.cmp(rhs)
    }
 }
--- a/meilisearch-core/src/criterion/exact.rs
+++ b/meilisearch-core/src/criterion/exact.rs
@ -1,131 +1,51 @@
-use std::cmp::Ordering;
+use std::cmp::{Ordering, Reverse};

-use sdset::Set;
+use compact_arena::SmallArena;
 use slice_group_by::GroupBy;

-use crate::criterion::Criterion;
-use crate::{AttrCount, RawDocument};
+use crate::automaton::QueryEnhancer;
+use crate::bucket_sort::{PostingsListView, BareMatch, QueryWordAutomaton};
+use crate::RawDocument;
+use super::Criterion;

-#[inline]
-fn number_exact_matches(
-    query_index: &[u32],
-    attribute: &[u16],
-    is_exact: &[bool],
-    fields_counts: &Set<AttrCount>,
-) -> usize {
-    let mut count = 0;
-    let mut index = 0;
-
-    for group in query_index.linear_group() {
-        let len = group.len();
-
-        let mut found_exact = false;
-        for (pos, is_exact) in is_exact[index..index + len].iter().enumerate() {
-            if *is_exact {
-                found_exact = true;
-                let attr = &attribute[index + pos];
-                if let Ok(pos) = fields_counts.binary_search_by_key(attr, |ac| ac.attr) {
-                    let AttrCount { count, .. } = fields_counts[pos];
-                    if count == 1 {
-                        return usize::max_value();
-                    }
-                }
-            }
-        }
-
-        count += found_exact as usize;
-        index += len;
-    }
-
-    count
-}
-
-#[derive(Debug, Clone, Copy)]
 pub struct Exact;

 impl Criterion for Exact {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            let is_exact = lhs.is_exact();
-            let attribute = lhs.attribute();
-            let fields_counts = lhs.fields_counts.as_ref().unwrap();
+    fn name(&self) -> &str { "exact" }

-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
+    fn prepare(
+        &self,
+        documents: &mut [RawDocument],
+        postings_lists: &mut SmallArena<PostingsListView>,
+        query_enhancer: &QueryEnhancer,
+        automatons: &[QueryWordAutomaton],
+    ) {
+        for document in documents {
+            document.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
+        }
+    }

-        let rhs = {
-            let query_index = rhs.query_index();
-            let is_exact = rhs.is_exact();
-            let attribute = rhs.attribute();
-            let fields_counts = rhs.fields_counts.as_ref().unwrap();
+    fn evaluate(
+        &self,
+        lhs: &RawDocument,
+        rhs: &RawDocument,
+        postings_lists: &SmallArena<PostingsListView>,
+    ) -> Ordering
+    {
+        #[inline]
+        fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
+            let mut sum_exact_query_words = 0;

-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
+            for group in matches.linear_group_by_key(|bm| bm.query_index) {
+                sum_exact_query_words += group[0].is_exact as usize;
+            }
+
+            sum_exact_query_words
+        }
+
+        let lhs = sum_exact_query_words(&lhs.raw_matches);
+        let rhs = sum_exact_query_words(&rhs.raw_matches);

        lhs.cmp(&rhs).reverse()
    }
-
-    fn name(&self) -> &str {
-        "Exact"
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    // typing: "soulier"
-    //
-    // doc0: "Soulier bleu"
-    // doc1: "souliereres rouge"
-    #[test]
-    fn easy_case() {
-        let doc0 = {
-            let query_index = &[0];
-            let attribute = &[0];
-            let is_exact = &[true];
-            let fields_counts = Set::new(&[AttrCount { attr: 0, count: 2 }]).unwrap();
-
-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
-
-        let doc1 = {
-            let query_index = &[0];
-            let attribute = &[0];
-            let is_exact = &[false];
-            let fields_counts = Set::new(&[AttrCount { attr: 0, count: 2 }]).unwrap();
-
-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
-
-        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
-    }
-
-    // typing: "soulier"
-    //
-    // doc0: { 0. "soulier" }
-    // doc1: { 0. "soulier bleu et blanc" }
-    #[test]
-    fn basic() {
-        let doc0 = {
-            let query_index = &[0];
-            let attribute = &[0];
-            let is_exact = &[true];
-            let fields_counts = Set::new(&[AttrCount { attr: 0, count: 1 }]).unwrap();
-
-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
-
-        let doc1 = {
-            let query_index = &[0];
-            let attribute = &[0];
-            let is_exact = &[true];
-            let fields_counts = Set::new(&[AttrCount { attr: 0, count: 4 }]).unwrap();
-
-            number_exact_matches(query_index, attribute, is_exact, fields_counts)
-        };
-
-        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
-    }
 }
--- a/meilisearch-core/src/criterion/mod.rs
+++ b/meilisearch-core/src/criterion/mod.rs
@ -1,58 +1,58 @@
-mod document_id;
-mod exact;
-mod number_of_words;
-mod sort_by_attr;
-mod sum_of_typos;
-mod sum_of_words_attribute;
-mod sum_of_words_position;
-mod words_proximity;
+use std::cmp::{self, Ordering};

+use compact_arena::SmallArena;
+use sdset::SetBuf;
+use slice_group_by::GroupBy;
+
+use crate::automaton::QueryEnhancer;
+use crate::bucket_sort::{SimpleMatch, PostingsListView, QueryWordAutomaton};
 use crate::RawDocument;
-use std::cmp::Ordering;

-pub use self::{
-    document_id::DocumentId, exact::Exact, number_of_words::NumberOfWords,
-    sort_by_attr::SortByAttr, sum_of_typos::SumOfTypos,
-    sum_of_words_attribute::SumOfWordsAttribute, sum_of_words_position::SumOfWordsPosition,
-    words_proximity::WordsProximity,
-};
+mod typo;
+mod words;
+mod proximity;
+mod attribute;
+mod words_position;
+mod exact;
+mod document_id;
+mod sort_by_attr;

-pub trait Criterion: Send + Sync {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering;
+pub use self::typo::Typo;
+pub use self::words::Words;
+pub use self::proximity::Proximity;
+pub use self::attribute::Attribute;
+pub use self::words_position::WordsPosition;
+pub use self::exact::Exact;
+pub use self::document_id::DocumentId;
+pub use self::sort_by_attr::SortByAttr;

+pub trait Criterion {
    fn name(&self) -> &str;

+    fn prepare<'a, 'tag, 'txn>(
+        &self,
+        documents: &mut [RawDocument<'a, 'tag>],
+        postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
+        query_enhancer: &QueryEnhancer,
+        automatons: &[QueryWordAutomaton],
+    );
+
+    fn evaluate<'a, 'tag, 'txn>(
+        &self,
+        lhs: &RawDocument<'a, 'tag>,
+        rhs: &RawDocument<'a, 'tag>,
+        postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
+    ) -> Ordering;
+
    #[inline]
-    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
-        self.evaluate(lhs, rhs) == Ordering::Equal
-    }
-}
-
-impl<'a, T: Criterion + ?Sized + Send + Sync> Criterion for &'a T {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        (**self).evaluate(lhs, rhs)
-    }
-
-    fn name(&self) -> &str {
-        (**self).name()
-    }
-
-    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
-        (**self).eq(lhs, rhs)
-    }
-}
-
-impl<T: Criterion + ?Sized> Criterion for Box<T> {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        (**self).evaluate(lhs, rhs)
-    }
-
-    fn name(&self) -> &str {
-        (**self).name()
-    }
-
-    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
-        (**self).eq(lhs, rhs)
+    fn eq<'a, 'tag, 'txn>(
+        &self,
+        lhs: &RawDocument<'a, 'tag>,
+        rhs: &RawDocument<'a, 'tag>,
+        postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
+    ) -> bool
+    {
+        self.evaluate(lhs, rhs, postings_lists) == Ordering::Equal
    }
 }

@ -103,11 +103,11 @@ pub struct Criteria<'a> {
 impl<'a> Default for Criteria<'a> {
    fn default() -> Self {
        CriteriaBuilder::with_capacity(7)
-            .add(SumOfTypos)
-            .add(NumberOfWords)
-            .add(WordsProximity)
-            .add(SumOfWordsAttribute)
-            .add(SumOfWordsPosition)
+            .add(Typo)
+            .add(Words)
+            .add(Proximity)
+            .add(Attribute)
+            .add(WordsPosition)
            .add(Exact)
            .add(DocumentId)
            .build()
@ -119,3 +119,165 @@ impl<'a> AsRef<[Box<dyn Criterion + 'a>]> for Criteria<'a> {
        &self.inner
    }
 }
+
+fn prepare_query_distances<'a, 'tag, 'txn>(
+    documents: &mut [RawDocument<'a, 'tag>],
+    query_enhancer: &QueryEnhancer,
+    automatons: &[QueryWordAutomaton],
+    postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
+) {
+    for document in documents {
+        if !document.processed_distances.is_empty() { continue }
+
+        let mut processed = Vec::new();
+        for m in document.raw_matches.iter() {
+            if postings_lists[m.postings_list].is_empty() { continue }
+
+            let range = query_enhancer.replacement(m.query_index as u32);
+            let new_len = cmp::max(range.end as usize, processed.len());
+            processed.resize(new_len, None);
+
+            for index in range {
+                let index = index as usize;
+                processed[index] = match processed[index] {
+                    Some(distance) if distance > m.distance => Some(m.distance),
+                    Some(distance) => Some(distance),
+                    None => Some(m.distance),
+                };
+            }
+        }
+
+        document.processed_distances = processed;
+    }
+}
+
+fn prepare_raw_matches<'a, 'tag, 'txn>(
+    documents: &mut [RawDocument<'a, 'tag>],
+    postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
+    query_enhancer: &QueryEnhancer,
+    automatons: &[QueryWordAutomaton],
+) {
+    for document in documents {
+        if !document.processed_matches.is_empty() { continue }
+
+        let mut processed = Vec::new();
+        for m in document.raw_matches.iter() {
+            let postings_list = &postings_lists[m.postings_list];
+            processed.reserve(postings_list.len());
+            for di in postings_list.as_ref() {
+                let simple_match = SimpleMatch {
+                    query_index: m.query_index,
+                    distance: m.distance,
+                    attribute: di.attribute,
+                    word_index: di.word_index,
+                    is_exact: m.is_exact,
+                };
+                processed.push(simple_match);
+            }
+        }
+
+        let processed = multiword_rewrite_matches(&mut processed, query_enhancer, automatons);
+        document.processed_matches = processed.into_vec();
+    }
+}
+
+fn multiword_rewrite_matches(
+    matches: &mut [SimpleMatch],
+    query_enhancer: &QueryEnhancer,
+    automatons: &[QueryWordAutomaton],
+) -> SetBuf<SimpleMatch>
+{
+    matches.sort_unstable_by_key(|m| (m.attribute, m.word_index));
+
+    let mut padded_matches = Vec::with_capacity(matches.len());
+
+    // let before_padding = Instant::now();
+    // for each attribute of each document
+    for same_document_attribute in matches.linear_group_by_key(|m| m.attribute) {
+        // padding will only be applied
+        // to word indices in the same attribute
+        let mut padding = 0;
+        let mut iter = same_document_attribute.linear_group_by_key(|m| m.word_index);
+
+        // for each match at the same position
+        // in this document attribute
+        while let Some(same_word_index) = iter.next() {
+            // find the biggest padding
+            let mut biggest = 0;
+            for match_ in same_word_index {
+                let mut replacement = query_enhancer.replacement(match_.query_index as u32);
+                let replacement_len = replacement.len();
+                let nexts = iter.remainder().linear_group_by_key(|m| m.word_index);
+
+                if let Some(query_index) = replacement.next() {
+                    let word_index = match_.word_index + padding as u16;
+                    let query_index = query_index as u16;
+                    let match_ = SimpleMatch { query_index, word_index, ..*match_ };
+                    padded_matches.push(match_);
+                }
+
+                let mut found = false;
+
+                // look ahead and if there already is a match
+                // corresponding to this padding word, abort the padding
+                'padding: for (x, next_group) in nexts.enumerate() {
+                    for (i, query_index) in replacement.clone().enumerate().skip(x) {
+                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
+                        let query_index = query_index as u16;
+                        let padmatch = SimpleMatch { query_index, word_index, ..*match_ };
+
+                        for nmatch_ in next_group {
+                            let mut rep = query_enhancer.replacement(nmatch_.query_index as u32);
+                            let query_index = rep.next().unwrap() as u16;
+                            if query_index == padmatch.query_index {
+                                if !found {
+                                    // if we find a corresponding padding for the
+                                    // first time we must push preceding paddings
+                                    for (i, query_index) in replacement.clone().enumerate().take(i)
+                                    {
+                                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
+                                        let query_index = query_index as u16;
+                                        let match_ = SimpleMatch { query_index, word_index, ..*match_ };
+                                        padded_matches.push(match_);
+                                        biggest = biggest.max(i + 1);
+                                    }
+                                }
+
+                                padded_matches.push(padmatch);
+                                found = true;
+                                continue 'padding;
+                            }
+                        }
+                    }
+
+                    // if we do not find a corresponding padding in the
+                    // next groups so stop here and pad what was found
+                    break;
+                }
+
+                if !found {
+                    // if no padding was found in the following matches
+                    // we must insert the entire padding
+                    for (i, query_index) in replacement.enumerate() {
+                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
+                        let query_index = query_index as u16;
+                        let match_ = SimpleMatch { query_index, word_index, ..*match_ };
+                        padded_matches.push(match_);
+                    }
+
+                    biggest = biggest.max(replacement_len - 1);
+                }
+            }
+
+            padding += biggest;
+        }
+    }
+
+    // debug!("padding matches took {:.02?}", before_padding.elapsed());
+
+    // With this check we can see that the loop above takes something
+    // like 43% of the search time even when no rewrite is needed.
+    // assert_eq!(before_matches, padded_matches);
+
+    SetBuf::from_dirty(padded_matches)
+}
--- a/meilisearch-core/src/criterion/number_of_words.rs
+++ b/meilisearch-core/src/criterion/number_of_words.rs
@ -1,31 +0,0 @@
-use crate::criterion::Criterion;
-use crate::RawDocument;
-use slice_group_by::GroupBy;
-use std::cmp::Ordering;
-
-#[inline]
-fn number_of_query_words(query_index: &[u32]) -> usize {
-    query_index.linear_group().count()
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct NumberOfWords;
-
-impl Criterion for NumberOfWords {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            number_of_query_words(query_index)
-        };
-        let rhs = {
-            let query_index = rhs.query_index();
-            number_of_query_words(query_index)
-        };
-
-        lhs.cmp(&rhs).reverse()
-    }
-
-    fn name(&self) -> &str {
-        "NumberOfWords"
-    }
-}
--- a/meilisearch-core/src/criterion/proximity.rs
+++ b/meilisearch-core/src/criterion/proximity.rs
@ -0,0 +1,79 @@
+use std::cmp::{self, Ordering};
+
+use compact_arena::SmallArena;
+use slice_group_by::GroupBy;
+
+use crate::automaton::QueryEnhancer;
+use crate::bucket_sort::{PostingsListView, SimpleMatch, QueryWordAutomaton};
+use crate::RawDocument;
+
+use super::{Criterion, prepare_raw_matches};
+
+pub struct Proximity;
+
+impl Criterion for Proximity {
+    fn name(&self) -> &str { "proximity" }
+
+    fn prepare<'a, 'tag, 'txn>(
+        &self,
+        documents: &mut [RawDocument<'a, 'tag>],
+        postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
+        query_enhancer: &QueryEnhancer,
+        automatons: &[QueryWordAutomaton],
+    ) {
+        prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
+    }
+
+    fn evaluate<'a, 'tag, 'txn>(
+        &self,
+        lhs: &RawDocument<'a, 'tag>,
+        rhs: &RawDocument<'a, 'tag>,
+        postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
+    ) -> Ordering
+    {
+        const MAX_DISTANCE: u16 = 8;
+
+        fn index_proximity(lhs: u16, rhs: u16) -> u16 {
+            if lhs < rhs {
+                cmp::min(rhs - lhs, MAX_DISTANCE)
+            } else {
+                cmp::min(lhs - rhs, MAX_DISTANCE) + 1
+            }
+        }
+
+        fn attribute_proximity(lhs: SimpleMatch, rhs: SimpleMatch) -> u16 {
+            if lhs.attribute != rhs.attribute { MAX_DISTANCE }
+            else { index_proximity(lhs.word_index, rhs.word_index) }
+        }
+
+        fn min_proximity(lhs: &[SimpleMatch], rhs: &[SimpleMatch]) -> u16 {
+            let mut min_prox = u16::max_value();
+            for a in lhs {
+                for b in rhs {
+                    let prox = attribute_proximity(*a, *b);
+                    min_prox = cmp::min(min_prox, prox);
+                }
+            }
+            min_prox
+        }
+
+        fn matches_proximity(matches: &[SimpleMatch],) -> u16 {
+            let mut proximity = 0;
+            let mut iter = matches.linear_group_by_key(|m| m.query_index);
+
+            // iterate over groups by windows of size 2
+            let mut last = iter.next();
+            while let (Some(lhs), Some(rhs)) = (last, iter.next()) {
+                proximity += min_proximity(lhs, rhs);
+                last = Some(rhs);
+            }
+
+            proximity
+        }
+
+        let lhs = matches_proximity(&lhs.processed_matches);
+        let rhs = matches_proximity(&rhs.processed_matches);
+
+        lhs.cmp(&rhs)
+    }
+}
--- a/meilisearch-core/src/criterion/sort_by_attr.rs
+++ b/meilisearch-core/src/criterion/sort_by_attr.rs
@ -2,9 +2,13 @@ use std::cmp::Ordering;
 use std::error::Error;
 use std::fmt;

+use compact_arena::SmallArena;
+use meilisearch_schema::{Schema, SchemaAttr};
+
+use crate::automaton::QueryEnhancer;
+use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
 use crate::criterion::Criterion;
 use crate::{RankedMap, RawDocument};
-use meilisearch_schema::{Schema, SchemaAttr};

 /// An helper struct that permit to sort documents by
 /// some of their stored attributes.
@ -28,11 +32,11 @@ use meilisearch_schema::{Schema, SchemaAttr};
 /// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
 ///
 /// let builder = CriteriaBuilder::with_capacity(8)
-///        .add(SumOfTypos)
-///        .add(NumberOfWords)
-///        .add(WordsProximity)
-///        .add(SumOfWordsAttribute)
-///        .add(SumOfWordsPosition)
+///        .add(Typo)
+///        .add(Words)
+///        .add(Proximity)
+///        .add(Attribute)
+///        .add(WordsPosition)
 ///        .add(Exact)
 ///        .add(custom_ranking)
 ///        .add(DocumentId);
@ -86,8 +90,28 @@ impl<'a> SortByAttr<'a> {
    }
 }

-impl<'a> Criterion for SortByAttr<'a> {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+impl Criterion for SortByAttr<'_> {
+    fn name(&self) -> &str {
+        "sort by attribute"
+    }
+
+    fn prepare<'a, 'tag, 'txn>(
+        &self,
+        documents: &mut [RawDocument<'a, 'tag>],
+        postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
+        query_enhancer: &QueryEnhancer,
+        automatons: &[QueryWordAutomaton],
+    ) {
+        // ...
+    }
+
+    fn evaluate<'a, 'tag, 'txn>(
+        &self,
+        lhs: &RawDocument<'a, 'tag>,
+        rhs: &RawDocument<'a, 'tag>,
+        postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
+    ) -> Ordering
+    {
        let lhs = self.ranked_map.get(lhs.id, self.attr);
        let rhs = self.ranked_map.get(rhs.id, self.attr);

@ -105,10 +129,6 @@ impl<'a> Criterion for SortByAttr<'a> {
            (None, None) => Ordering::Equal,
        }
    }
-
-    fn name(&self) -> &str {
-        "SortByAttr"
-    }
 }

 #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
--- a/meilisearch-core/src/criterion/sum_of_typos.rs
+++ b/meilisearch-core/src/criterion/sum_of_typos.rs
@ -1,116 +0,0 @@
-use std::cmp::Ordering;
-
-use slice_group_by::GroupBy;
-
-use crate::criterion::Criterion;
-use crate::RawDocument;
-
-// This function is a wrong logarithmic 10 function.
-// It is safe to panic on input number higher than 3,
-// the number of typos is never bigger than that.
-#[inline]
-fn custom_log10(n: u8) -> f32 {
-    match n {
-        0 => 0.0,     // log(1)
-        1 => 0.30102, // log(2)
-        2 => 0.47712, // log(3)
-        3 => 0.60205, // log(4)
-        _ => panic!("invalid number"),
-    }
-}
-
-#[inline]
-fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize {
-    let mut number_words: usize = 0;
-    let mut sum_typos = 0.0;
-    let mut index = 0;
-
-    for group in query_index.linear_group() {
-        sum_typos += custom_log10(distance[index]);
-        number_words += 1;
-        index += group.len();
-    }
-
-    (number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct SumOfTypos;
-
-impl Criterion for SumOfTypos {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            let distance = lhs.distance();
-            sum_matches_typos(query_index, distance)
-        };
-
-        let rhs = {
-            let query_index = rhs.query_index();
-            let distance = rhs.distance();
-            sum_matches_typos(query_index, distance)
-        };
-
-        lhs.cmp(&rhs).reverse()
-    }
-
-    fn name(&self) -> &str {
-        "SumOfTypos"
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    // typing: "Geox CEO"
-    //
-    // doc0: "Geox SpA: CEO and Executive"
-    // doc1: "Mt. Gox CEO Resigns From Bitcoin Foundation"
-    #[test]
-    fn one_typo_reference() {
-        let query_index0 = &[0, 1];
-        let distance0 = &[0, 0];
-
-        let query_index1 = &[0, 1];
-        let distance1 = &[1, 0];
-
-        let doc0 = sum_matches_typos(query_index0, distance0);
-        let doc1 = sum_matches_typos(query_index1, distance1);
-        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
-    }
-
-    // typing: "bouton manchette"
-    //
-    // doc0: "bouton manchette"
-    // doc1: "bouton"
-    #[test]
-    fn no_typo() {
-        let query_index0 = &[0, 1];
-        let distance0 = &[0, 0];
-
-        let query_index1 = &[0];
-        let distance1 = &[0];
-
-        let doc0 = sum_matches_typos(query_index0, distance0);
-        let doc1 = sum_matches_typos(query_index1, distance1);
-        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
-    }
-
-    // typing: "bouton manchztte"
-    //
-    // doc0: "bouton manchette"
-    // doc1: "bouton"
-    #[test]
-    fn one_typo() {
-        let query_index0 = &[0, 1];
-        let distance0 = &[0, 1];
-
-        let query_index1 = &[0];
-        let distance1 = &[0];
-
-        let doc0 = sum_matches_typos(query_index0, distance0);
-        let doc1 = sum_matches_typos(query_index1, distance1);
-        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
-    }
-}
--- a/meilisearch-core/src/criterion/sum_of_words_attribute.rs
+++ b/meilisearch-core/src/criterion/sum_of_words_attribute.rs
@ -1,64 +0,0 @@
-use crate::criterion::Criterion;
-use crate::RawDocument;
-use slice_group_by::GroupBy;
-use std::cmp::Ordering;
-
-#[inline]
-fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
-    let mut sum_attributes = 0;
-    let mut index = 0;
-
-    for group in query_index.linear_group() {
-        sum_attributes += attribute[index] as usize;
-        index += group.len();
-    }
-
-    sum_attributes
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct SumOfWordsAttribute;
-
-impl Criterion for SumOfWordsAttribute {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            let attribute = lhs.attribute();
-            sum_matches_attributes(query_index, attribute)
-        };
-
-        let rhs = {
-            let query_index = rhs.query_index();
-            let attribute = rhs.attribute();
-            sum_matches_attributes(query_index, attribute)
-        };
-
-        lhs.cmp(&rhs)
-    }
-
-    fn name(&self) -> &str {
-        "SumOfWordsAttribute"
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    // typing: "soulier"
-    //
-    // doc0: { 0. "Soulier bleu", 1. "bla bla bla" }
-    // doc1: { 0. "Botte rouge", 1. "Soulier en cuir" }
-    #[test]
-    fn title_vs_description() {
-        let query_index0 = &[0];
-        let attribute0 = &[0];
-
-        let query_index1 = &[0];
-        let attribute1 = &[1];
-
-        let doc0 = sum_matches_attributes(query_index0, attribute0);
-        let doc1 = sum_matches_attributes(query_index1, attribute1);
-        assert_eq!(doc0.cmp(&doc1), Ordering::Less);
-    }
-}
--- a/meilisearch-core/src/criterion/sum_of_words_position.rs
+++ b/meilisearch-core/src/criterion/sum_of_words_position.rs
@ -1,64 +0,0 @@
-use crate::criterion::Criterion;
-use crate::RawDocument;
-use slice_group_by::GroupBy;
-use std::cmp::Ordering;
-
-#[inline]
-fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
-    let mut sum_word_index = 0;
-    let mut index = 0;
-
-    for group in query_index.linear_group() {
-        sum_word_index += word_index[index] as usize;
-        index += group.len();
-    }
-
-    sum_word_index
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct SumOfWordsPosition;
-
-impl Criterion for SumOfWordsPosition {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            let word_index = lhs.word_index();
-            sum_matches_attribute_index(query_index, word_index)
-        };
-
-        let rhs = {
-            let query_index = rhs.query_index();
-            let word_index = rhs.word_index();
-            sum_matches_attribute_index(query_index, word_index)
-        };
-
-        lhs.cmp(&rhs)
-    }
-
-    fn name(&self) -> &str {
-        "SumOfWordsPosition"
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    // typing: "soulier"
-    //
-    // doc0: "Soulier bleu"
-    // doc1: "Botte rouge et soulier noir"
-    #[test]
-    fn easy_case() {
-        let query_index0 = &[0];
-        let word_index0 = &[0];
-
-        let query_index1 = &[0];
-        let word_index1 = &[3];
-
-        let doc0 = sum_matches_attribute_index(query_index0, word_index0);
-        let doc1 = sum_matches_attribute_index(query_index1, word_index1);
-        assert_eq!(doc0.cmp(&doc1), Ordering::Less);
-    }
-}
--- a/meilisearch-core/src/criterion/typo.rs
+++ b/meilisearch-core/src/criterion/typo.rs
@ -0,0 +1,67 @@
+use std::cmp::Ordering;
+
+use compact_arena::SmallArena;
+
+use crate::automaton::QueryEnhancer;
+use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
+use crate::RawDocument;
+
+use super::{Criterion, prepare_query_distances};
+
+pub struct Typo;
+
+impl Criterion for Typo {
+    fn name(&self) -> &str { "typo" }
+
+    fn prepare<'a, 'tag, 'txn>(
+        &self,
+        documents: &mut [RawDocument<'a, 'tag>],
+        postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
+        query_enhancer: &QueryEnhancer,
+        automatons: &[QueryWordAutomaton],
+    ) {
+        prepare_query_distances(documents, query_enhancer, automatons, postings_lists);
+    }
+
+    fn evaluate(
+        &self,
+        lhs: &RawDocument,
+        rhs: &RawDocument,
+        postings_lists: &SmallArena<PostingsListView>,
+    ) -> Ordering
+    {
+        // This function is a wrong logarithmic 10 function.
+        // It is safe to panic on input number higher than 3,
+        // the number of typos is never bigger than that.
+        #[inline]
+        fn custom_log10(n: u8) -> f32 {
+            match n {
+                0 => 0.0,     // log(1)
+                1 => 0.30102, // log(2)
+                2 => 0.47712, // log(3)
+                3 => 0.60205, // log(4)
+                _ => panic!("invalid number"),
+            }
+        }
+
+        #[inline]
+        fn compute_typos(distances: &[Option<u8>]) -> usize {
+            let mut number_words: usize = 0;
+            let mut sum_typos = 0.0;
+
+            for distance in distances {
+                if let Some(distance) = distance {
+                    sum_typos += custom_log10(*distance);
+                    number_words += 1;
+                }
+            }
+
+            (number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
+        }
+
+        let lhs = compute_typos(&lhs.processed_distances);
+        let rhs = compute_typos(&rhs.processed_distances);
+
+        lhs.cmp(&rhs).reverse()
+    }
+}
--- a/meilisearch-core/src/criterion/words.rs
+++ b/meilisearch-core/src/criterion/words.rs
@ -0,0 +1,43 @@
+use std::cmp::Ordering;
+
+use compact_arena::SmallArena;
+
+use crate::automaton::QueryEnhancer;
+use crate::bucket_sort::{PostingsListView, QueryWordAutomaton};
+use crate::RawDocument;
+
+use super::{Criterion, prepare_query_distances};
+
+pub struct Words;
+
+impl Criterion for Words {
+    fn name(&self) -> &str { "words" }
+
+    fn prepare<'a, 'tag, 'txn>(
+        &self,
+        documents: &mut [RawDocument<'a, 'tag>],
+        postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
+        query_enhancer: &QueryEnhancer,
+        automatons: &[QueryWordAutomaton],
+    ) {
+        prepare_query_distances(documents, query_enhancer, automatons, postings_lists);
+    }
+
+    fn evaluate(
+        &self,
+        lhs: &RawDocument,
+        rhs: &RawDocument,
+        postings_lists: &SmallArena<PostingsListView>,
+    ) -> Ordering
+    {
+        #[inline]
+        fn number_of_query_words(distances: &[Option<u8>]) -> usize {
+            distances.iter().cloned().filter(Option::is_some).count()
+        }
+
+        let lhs = number_of_query_words(&lhs.processed_distances);
+        let rhs = number_of_query_words(&rhs.processed_distances);
+
+        lhs.cmp(&rhs).reverse()
+    }
+}
--- a/meilisearch-core/src/criterion/words_position.rs
+++ b/meilisearch-core/src/criterion/words_position.rs
@ -0,0 +1,48 @@
+use std::cmp::Ordering;
+
+use compact_arena::SmallArena;
+use slice_group_by::GroupBy;
+
+use crate::automaton::QueryEnhancer;
+use crate::bucket_sort::{PostingsListView, SimpleMatch, QueryWordAutomaton};
+use crate::RawDocument;
+
+use super::{Criterion, prepare_raw_matches};
+
+pub struct WordsPosition;
+
+impl Criterion for WordsPosition {
+    fn name(&self) -> &str { "words position" }
+
+    fn prepare<'a, 'tag, 'txn>(
+        &self,
+        documents: &mut [RawDocument<'a, 'tag>],
+        postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
+        query_enhancer: &QueryEnhancer,
+        automatons: &[QueryWordAutomaton],
+    ) {
+        prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
+    }
+
+    fn evaluate<'a, 'tag, 'txn>(
+        &self,
+        lhs: &RawDocument<'a, 'tag>,
+        rhs: &RawDocument<'a, 'tag>,
+        postings_lists: &SmallArena<'tag, PostingsListView<'txn>>,
+    ) -> Ordering
+    {
+        #[inline]
+        fn sum_words_position(matches: &[SimpleMatch]) -> usize {
+            let mut sum_words_position = 0;
+            for group in matches.linear_group_by_key(|bm| bm.query_index) {
+                sum_words_position += group[0].word_index as usize;
+            }
+            sum_words_position
+        }
+
+        let lhs = sum_words_position(&lhs.processed_matches);
+        let rhs = sum_words_position(&rhs.processed_matches);
+
+        lhs.cmp(&rhs)
+    }
+}
--- a/meilisearch-core/src/criterion/words_proximity.rs
+++ b/meilisearch-core/src/criterion/words_proximity.rs
@ -1,164 +0,0 @@
-use crate::criterion::Criterion;
-use crate::RawDocument;
-use slice_group_by::GroupBy;
-use std::cmp::{self, Ordering};
-
-const MAX_DISTANCE: u16 = 8;
-
-#[inline]
-fn clone_tuple<T: Clone, U: Clone>((a, b): (&T, &U)) -> (T, U) {
-    (a.clone(), b.clone())
-}
-
-fn index_proximity(lhs: u16, rhs: u16) -> u16 {
-    if lhs < rhs {
-        cmp::min(rhs - lhs, MAX_DISTANCE)
-    } else {
-        cmp::min(lhs - rhs, MAX_DISTANCE) + 1
-    }
-}
-
-fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 {
-    if lattr != rattr {
-        return MAX_DISTANCE;
-    }
-    index_proximity(lwi, rwi)
-}
-
-fn min_proximity((lattr, lwi): (&[u16], &[u16]), (rattr, rwi): (&[u16], &[u16])) -> u16 {
-    let mut min_prox = u16::max_value();
-
-    for a in lattr.iter().zip(lwi) {
-        for b in rattr.iter().zip(rwi) {
-            let a = clone_tuple(a);
-            let b = clone_tuple(b);
-            min_prox = cmp::min(min_prox, attribute_proximity(a, b));
-        }
-    }
-
-    min_prox
-}
-
-fn matches_proximity(
-    query_index: &[u32],
-    distance: &[u8],
-    attribute: &[u16],
-    word_index: &[u16],
-) -> u16 {
-    let mut query_index_groups = query_index.linear_group();
-    let mut proximity = 0;
-    let mut index = 0;
-
-    let get_attr_wi = |index: usize, group_len: usize| {
-        // retrieve the first distance group (with the lowest values)
-        let len = distance[index..index + group_len]
-            .linear_group()
-            .next()
-            .unwrap()
-            .len();
-
-        let rattr = &attribute[index..index + len];
-        let rwi = &word_index[index..index + len];
-
-        (rattr, rwi)
-    };
-
-    let mut last = query_index_groups.next().map(|group| {
-        let attr_wi = get_attr_wi(index, group.len());
-        index += group.len();
-        attr_wi
-    });
-
-    // iter by windows of size 2
-    while let (Some(lhs), Some(rhs)) = (last, query_index_groups.next()) {
-        let attr_wi = get_attr_wi(index, rhs.len());
-        proximity += min_proximity(lhs, attr_wi);
-        last = Some(attr_wi);
-        index += rhs.len();
-    }
-
-    proximity
-}
-
-#[derive(Debug, Clone, Copy)]
-pub struct WordsProximity;
-
-impl Criterion for WordsProximity {
-    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
-        let lhs = {
-            let query_index = lhs.query_index();
-            let distance = lhs.distance();
-            let attribute = lhs.attribute();
-            let word_index = lhs.word_index();
-            matches_proximity(query_index, distance, attribute, word_index)
-        };
-
-        let rhs = {
-            let query_index = rhs.query_index();
-            let distance = rhs.distance();
-            let attribute = rhs.attribute();
-            let word_index = rhs.word_index();
-            matches_proximity(query_index, distance, attribute, word_index)
-        };
-
-        lhs.cmp(&rhs)
-    }
-
-    fn name(&self) -> &str {
-        "WordsProximity"
-    }
-}
-
-#[cfg(test)]
-mod tests {
-    use super::*;
-
-    #[test]
-    fn three_different_attributes() {
-        // "soup" "of the" "the day"
-        //
-        // { id: 0, attr: 0, attr_index: 0 }
-        // { id: 1, attr: 1, attr_index: 0 }
-        // { id: 2, attr: 1, attr_index: 1 }
-        // { id: 2, attr: 2, attr_index: 0 }
-        // { id: 3, attr: 3, attr_index: 1 }
-
-        let query_index = &[0, 1, 2, 2, 3];
-        let distance = &[0, 0, 0, 0, 0];
-        let attribute = &[0, 1, 1, 2, 3];
-        let word_index = &[0, 0, 1, 0, 1];
-
-        //   soup -> of = 8
-        // + of -> the  = 1
-        // + the -> day = 8 (not 1)
-        assert_eq!(
-            matches_proximity(query_index, distance, attribute, word_index),
-            17
-        );
-    }
-
-    #[test]
-    fn two_different_attributes() {
-        // "soup day" "soup of the day"
-        //
-        // { id: 0, attr: 0, attr_index: 0 }
-        // { id: 0, attr: 1, attr_index: 0 }
-        // { id: 1, attr: 1, attr_index: 1 }
-        // { id: 2, attr: 1, attr_index: 2 }
-        // { id: 3, attr: 0, attr_index: 1 }
-        // { id: 3, attr: 1, attr_index: 3 }
-
-        let query_index = &[0, 0, 1, 2, 3, 3];
-        let distance = &[0, 0, 0, 0, 0, 0];
-        let attribute = &[0, 1, 1, 1, 0, 1];
-        let word_index = &[0, 0, 1, 2, 1, 3];
-
-        //   soup -> of = 1
-        // + of -> the  = 1
-        // + the -> day = 1
-        assert_eq!(
-            matches_proximity(query_index, distance, attribute, word_index),
-            3
-        );
-    }
-}
--- a/meilisearch-core/src/criterion2.rs
+++ b/meilisearch-core/src/criterion2.rs
@ -1,514 +0,0 @@
-use std::cmp::{self, Ordering, Reverse};
-use std::borrow::Cow;
-use std::sync::atomic::{self, AtomicUsize};
-
-use slice_group_by::{GroupBy, GroupByMut};
-use compact_arena::SmallArena;
-use sdset::{Set, SetBuf};
-use log::debug;
-
-use crate::{DocIndex, DocumentId};
-use crate::bucket_sort::{BareMatch, SimpleMatch, RawDocument, PostingsListView, QueryWordAutomaton};
-use crate::automaton::QueryEnhancer;
-
-type PostingsListsArena<'tag, 'txn> = SmallArena<'tag, PostingsListView<'txn>>;
-
-pub trait Criterion {
-    fn name(&self) -> &str;
-
-    fn prepare<'a, 'tag, 'txn>(
-        &self,
-        documents: &mut [RawDocument<'a, 'tag>],
-        postings_lists: &mut PostingsListsArena<'tag, 'txn>,
-        query_enhancer: &QueryEnhancer,
-        automatons: &[QueryWordAutomaton],
-    );
-
-    fn evaluate<'a, 'tag, 'txn>(
-        &self,
-        lhs: &RawDocument<'a, 'tag>,
-        rhs: &RawDocument<'a, 'tag>,
-        postings_lists: &PostingsListsArena<'tag, 'txn>,
-    ) -> Ordering;
-
-    #[inline]
-    fn eq<'a, 'tag, 'txn>(
-        &self,
-        lhs: &RawDocument<'a, 'tag>,
-        rhs: &RawDocument<'a, 'tag>,
-        postings_lists: &PostingsListsArena<'tag, 'txn>,
-    ) -> bool
-    {
-        self.evaluate(lhs, rhs, postings_lists) == Ordering::Equal
-    }
-}
-
-fn prepare_query_distances<'a, 'tag, 'txn>(
-    documents: &mut [RawDocument<'a, 'tag>],
-    query_enhancer: &QueryEnhancer,
-    automatons: &[QueryWordAutomaton],
-    postings_lists: &PostingsListsArena<'tag, 'txn>,
-) {
-    for document in documents {
-        if !document.processed_distances.is_empty() { continue }
-
-        let mut processed = Vec::new();
-        for m in document.raw_matches.iter() {
-            if postings_lists[m.postings_list].is_empty() { continue }
-
-            let range = query_enhancer.replacement(m.query_index as u32);
-            let new_len = cmp::max(range.end as usize, processed.len());
-            processed.resize(new_len, None);
-
-            for index in range {
-                let index = index as usize;
-                processed[index] = match processed[index] {
-                    Some(distance) if distance > m.distance => Some(m.distance),
-                    Some(distance) => Some(distance),
-                    None => Some(m.distance),
-                };
-            }
-        }
-
-        document.processed_distances = processed;
-    }
-}
-
-pub struct Typo;
-
-impl Criterion for Typo {
-    fn name(&self) -> &str { "typo" }
-
-    fn prepare<'a, 'tag, 'txn>(
-        &self,
-        documents: &mut [RawDocument<'a, 'tag>],
-        postings_lists: &mut PostingsListsArena<'tag, 'txn>,
-        query_enhancer: &QueryEnhancer,
-        automatons: &[QueryWordAutomaton],
-    ) {
-        prepare_query_distances(documents, query_enhancer, automatons, postings_lists);
-    }
-
-    fn evaluate(
-        &self,
-        lhs: &RawDocument,
-        rhs: &RawDocument,
-        postings_lists: &PostingsListsArena,
-    ) -> Ordering
-    {
-        // This function is a wrong logarithmic 10 function.
-        // It is safe to panic on input number higher than 3,
-        // the number of typos is never bigger than that.
-        #[inline]
-        fn custom_log10(n: u8) -> f32 {
-            match n {
-                0 => 0.0,     // log(1)
-                1 => 0.30102, // log(2)
-                2 => 0.47712, // log(3)
-                3 => 0.60205, // log(4)
-                _ => panic!("invalid number"),
-            }
-        }
-
-        #[inline]
-        fn compute_typos(distances: &[Option<u8>]) -> usize {
-            let mut number_words: usize = 0;
-            let mut sum_typos = 0.0;
-
-            for distance in distances {
-                if let Some(distance) = distance {
-                    sum_typos += custom_log10(*distance);
-                    number_words += 1;
-                }
-            }
-
-            (number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
-        }
-
-        let lhs = compute_typos(&lhs.processed_distances);
-        let rhs = compute_typos(&rhs.processed_distances);
-
-        lhs.cmp(&rhs).reverse()
-    }
-}
-
-pub struct Words;
-
-impl Criterion for Words {
-    fn name(&self) -> &str { "words" }
-
-    fn prepare<'a, 'tag, 'txn>(
-        &self,
-        documents: &mut [RawDocument<'a, 'tag>],
-        postings_lists: &mut PostingsListsArena<'tag, 'txn>,
-        query_enhancer: &QueryEnhancer,
-        automatons: &[QueryWordAutomaton],
-    ) {
-        prepare_query_distances(documents, query_enhancer, automatons, postings_lists);
-    }
-
-    fn evaluate(
-        &self,
-        lhs: &RawDocument,
-        rhs: &RawDocument,
-        postings_lists: &PostingsListsArena,
-    ) -> Ordering
-    {
-        #[inline]
-        fn number_of_query_words(distances: &[Option<u8>]) -> usize {
-            distances.iter().cloned().filter(Option::is_some).count()
-        }
-
-        let lhs = number_of_query_words(&lhs.processed_distances);
-        let rhs = number_of_query_words(&rhs.processed_distances);
-
-        lhs.cmp(&rhs).reverse()
-    }
-}
-
-fn prepare_raw_matches<'a, 'tag, 'txn>(
-    documents: &mut [RawDocument<'a, 'tag>],
-    postings_lists: &mut PostingsListsArena<'tag, 'txn>,
-    query_enhancer: &QueryEnhancer,
-    automatons: &[QueryWordAutomaton],
-) {
-    for document in documents {
-        if !document.processed_matches.is_empty() { continue }
-
-        let mut processed = Vec::new();
-        for m in document.raw_matches.iter() {
-            let postings_list = &postings_lists[m.postings_list];
-            processed.reserve(postings_list.len());
-            for di in postings_list.as_ref() {
-                let simple_match = SimpleMatch {
-                    query_index: m.query_index,
-                    distance: m.distance,
-                    attribute: di.attribute,
-                    word_index: di.word_index,
-                    is_exact: m.is_exact,
-                };
-                processed.push(simple_match);
-            }
-        }
-
-        let processed = multiword_rewrite_matches(&mut processed, query_enhancer, automatons);
-        document.processed_matches = processed.into_vec();
-    }
-}
-
-pub struct Proximity;
-
-impl Criterion for Proximity {
-    fn name(&self) -> &str { "proximity" }
-
-    fn prepare<'a, 'tag, 'txn>(
-        &self,
-        documents: &mut [RawDocument<'a, 'tag>],
-        postings_lists: &mut PostingsListsArena<'tag, 'txn>,
-        query_enhancer: &QueryEnhancer,
-        automatons: &[QueryWordAutomaton],
-    ) {
-        prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
-    }
-
-    fn evaluate<'a, 'tag, 'txn>(
-        &self,
-        lhs: &RawDocument<'a, 'tag>,
-        rhs: &RawDocument<'a, 'tag>,
-        postings_lists: &PostingsListsArena<'tag, 'txn>,
-    ) -> Ordering
-    {
-        const MAX_DISTANCE: u16 = 8;
-
-        fn index_proximity(lhs: u16, rhs: u16) -> u16 {
-            if lhs < rhs {
-                cmp::min(rhs - lhs, MAX_DISTANCE)
-            } else {
-                cmp::min(lhs - rhs, MAX_DISTANCE) + 1
-            }
-        }
-
-        fn attribute_proximity(lhs: SimpleMatch, rhs: SimpleMatch) -> u16 {
-            if lhs.attribute != rhs.attribute { MAX_DISTANCE }
-            else { index_proximity(lhs.word_index, rhs.word_index) }
-        }
-
-        fn min_proximity(lhs: &[SimpleMatch], rhs: &[SimpleMatch]) -> u16 {
-            let mut min_prox = u16::max_value();
-            for a in lhs {
-                for b in rhs {
-                    let prox = attribute_proximity(*a, *b);
-                    min_prox = cmp::min(min_prox, prox);
-                }
-            }
-            min_prox
-        }
-
-        fn matches_proximity(matches: &[SimpleMatch],) -> u16 {
-            let mut proximity = 0;
-            let mut iter = matches.linear_group_by_key(|m| m.query_index);
-
-            // iterate over groups by windows of size 2
-            let mut last = iter.next();
-            while let (Some(lhs), Some(rhs)) = (last, iter.next()) {
-                proximity += min_proximity(lhs, rhs);
-                last = Some(rhs);
-            }
-
-            proximity
-        }
-
-        let lhs = matches_proximity(&lhs.processed_matches);
-        let rhs = matches_proximity(&rhs.processed_matches);
-
-        lhs.cmp(&rhs)
-    }
-}
-
-pub struct Attribute;
-
-impl Criterion for Attribute {
-    fn name(&self) -> &str { "attribute" }
-
-    fn prepare<'a, 'tag, 'txn>(
-        &self,
-        documents: &mut [RawDocument<'a, 'tag>],
-        postings_lists: &mut PostingsListsArena<'tag, 'txn>,
-        query_enhancer: &QueryEnhancer,
-        automatons: &[QueryWordAutomaton],
-    ) {
-        prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
-    }
-
-    fn evaluate<'a, 'tag, 'txn>(
-        &self,
-        lhs: &RawDocument<'a, 'tag>,
-        rhs: &RawDocument<'a, 'tag>,
-        postings_lists: &PostingsListsArena<'tag, 'txn>,
-    ) -> Ordering
-    {
-        #[inline]
-        fn best_attribute(matches: &[SimpleMatch]) -> u16 {
-            let mut best_attribute = u16::max_value();
-            for group in matches.linear_group_by_key(|bm| bm.query_index) {
-                best_attribute = cmp::min(best_attribute, group[0].attribute);
-            }
-            best_attribute
-        }
-
-        let lhs = best_attribute(&lhs.processed_matches);
-        let rhs = best_attribute(&rhs.processed_matches);
-
-        lhs.cmp(&rhs)
-    }
-}
-
-pub struct WordsPosition;
-
-impl Criterion for WordsPosition {
-    fn name(&self) -> &str { "words position" }
-
-    fn prepare<'a, 'tag, 'txn>(
-        &self,
-        documents: &mut [RawDocument<'a, 'tag>],
-        postings_lists: &mut PostingsListsArena<'tag, 'txn>,
-        query_enhancer: &QueryEnhancer,
-        automatons: &[QueryWordAutomaton],
-    ) {
-        prepare_raw_matches(documents, postings_lists, query_enhancer, automatons);
-    }
-
-    fn evaluate<'a, 'tag, 'txn>(
-        &self,
-        lhs: &RawDocument<'a, 'tag>,
-        rhs: &RawDocument<'a, 'tag>,
-        postings_lists: &PostingsListsArena<'tag, 'txn>,
-    ) -> Ordering
-    {
-        #[inline]
-        fn sum_words_position(matches: &[SimpleMatch]) -> usize {
-            let mut sum_words_position = 0;
-            for group in matches.linear_group_by_key(|bm| bm.query_index) {
-                sum_words_position += group[0].word_index as usize;
-            }
-            sum_words_position
-        }
-
-        let lhs = sum_words_position(&lhs.processed_matches);
-        let rhs = sum_words_position(&rhs.processed_matches);
-
-        lhs.cmp(&rhs)
-    }
-}
-
-pub struct Exact;
-
-impl Criterion for Exact {
-    fn name(&self) -> &str { "exact" }
-
-    fn prepare(
-        &self,
-        documents: &mut [RawDocument],
-        postings_lists: &mut PostingsListsArena,
-        query_enhancer: &QueryEnhancer,
-        automatons: &[QueryWordAutomaton],
-    ) {
-        for document in documents {
-            document.raw_matches.sort_unstable_by_key(|bm| (bm.query_index, Reverse(bm.is_exact)));
-        }
-    }
-
-    fn evaluate(
-        &self,
-        lhs: &RawDocument,
-        rhs: &RawDocument,
-        postings_lists: &PostingsListsArena,
-    ) -> Ordering
-    {
-        #[inline]
-        fn sum_exact_query_words(matches: &[BareMatch]) -> usize {
-            let mut sum_exact_query_words = 0;
-
-            for group in matches.linear_group_by_key(|bm| bm.query_index) {
-                sum_exact_query_words += group[0].is_exact as usize;
-            }
-
-            sum_exact_query_words
-        }
-
-        let lhs = sum_exact_query_words(&lhs.raw_matches);
-        let rhs = sum_exact_query_words(&rhs.raw_matches);
-
-        lhs.cmp(&rhs).reverse()
-    }
-}
-
-pub struct StableDocId;
-
-impl Criterion for StableDocId {
-    fn name(&self) -> &str { "stable document id" }
-
-    fn prepare(
-        &self,
-        documents: &mut [RawDocument],
-        postings_lists: &mut PostingsListsArena,
-        query_enhancer: &QueryEnhancer,
-        automatons: &[QueryWordAutomaton],
-    ) {
-        // ...
-    }
-
-    fn evaluate(
-        &self,
-        lhs: &RawDocument,
-        rhs: &RawDocument,
-        postings_lists: &PostingsListsArena,
-    ) -> Ordering
-    {
-        let lhs = &lhs.raw_matches[0].document_id;
-        let rhs = &rhs.raw_matches[0].document_id;
-
-        lhs.cmp(rhs)
-    }
-}
-
-pub fn multiword_rewrite_matches(
-    matches: &mut [SimpleMatch],
-    query_enhancer: &QueryEnhancer,
-    automatons: &[QueryWordAutomaton],
-) -> SetBuf<SimpleMatch>
-{
-    matches.sort_unstable_by_key(|m| (m.attribute, m.word_index));
-
-    let mut padded_matches = Vec::with_capacity(matches.len());
-
-    // let before_padding = Instant::now();
-    // for each attribute of each document
-    for same_document_attribute in matches.linear_group_by_key(|m| m.attribute) {
-        // padding will only be applied
-        // to word indices in the same attribute
-        let mut padding = 0;
-        let mut iter = same_document_attribute.linear_group_by_key(|m| m.word_index);
-
-        // for each match at the same position
-        // in this document attribute
-        while let Some(same_word_index) = iter.next() {
-            // find the biggest padding
-            let mut biggest = 0;
-            for match_ in same_word_index {
-                let mut replacement = query_enhancer.replacement(match_.query_index as u32);
-                let replacement_len = replacement.len();
-                let nexts = iter.remainder().linear_group_by_key(|m| m.word_index);
-
-                if let Some(query_index) = replacement.next() {
-                    let word_index = match_.word_index + padding as u16;
-                    let query_index = query_index as u16;
-                    let match_ = SimpleMatch { query_index, word_index, ..*match_ };
-                    padded_matches.push(match_);
-                }
-
-                let mut found = false;
-
-                // look ahead and if there already is a match
-                // corresponding to this padding word, abort the padding
-                'padding: for (x, next_group) in nexts.enumerate() {
-                    for (i, query_index) in replacement.clone().enumerate().skip(x) {
-                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
-                        let query_index = query_index as u16;
-                        let padmatch = SimpleMatch { query_index, word_index, ..*match_ };
-
-                        for nmatch_ in next_group {
-                            let mut rep = query_enhancer.replacement(nmatch_.query_index as u32);
-                            let query_index = rep.next().unwrap() as u16;
-                            if query_index == padmatch.query_index {
-                                if !found {
-                                    // if we find a corresponding padding for the
-                                    // first time we must push preceding paddings
-                                    for (i, query_index) in replacement.clone().enumerate().take(i)
-                                    {
-                                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
-                                        let query_index = query_index as u16;
-                                        let match_ = SimpleMatch { query_index, word_index, ..*match_ };
-                                        padded_matches.push(match_);
-                                        biggest = biggest.max(i + 1);
-                                    }
-                                }
-
-                                padded_matches.push(padmatch);
-                                found = true;
-                                continue 'padding;
-                            }
-                        }
-                    }
-
-                    // if we do not find a corresponding padding in the
-                    // next groups so stop here and pad what was found
-                    break;
-                }
-
-                if !found {
-                    // if no padding was found in the following matches
-                    // we must insert the entire padding
-                    for (i, query_index) in replacement.enumerate() {
-                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
-                        let query_index = query_index as u16;
-                        let match_ = SimpleMatch { query_index, word_index, ..*match_ };
-                        padded_matches.push(match_);
-                    }
-
-                    biggest = biggest.max(replacement_len - 1);
-                }
-            }
-
-            padding += biggest;
-        }
-    }
-
-    // debug!("padding matches took {:.02?}", before_padding.elapsed());
-
-    // With this check we can see that the loop above takes something
-    // like 43% of the search time even when no rewrite is needed.
-    // assert_eq!(before_matches, padded_matches);
-
-    SetBuf::from_dirty(padded_matches)
-}
--- a/meilisearch-core/src/lib.rs
+++ b/meilisearch-core/src/lib.rs
@ -20,7 +20,6 @@ mod update;

 // TODO replace
 mod bucket_sort;
-mod criterion2;

 pub use self::database::{BoxUpdateFn, Database, MainT, UpdateT};
 pub use self::error::{Error, MResult};
@ -31,62 +30,13 @@ pub use self::store::Index;
 pub use self::update::{EnqueuedUpdateResult, ProcessedUpdateResult, UpdateStatus, UpdateType};
 pub use meilisearch_types::{DocIndex, DocumentId, Highlight, AttrCount};

-#[doc(hidden)]
-#[derive(Debug, Copy, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
-pub struct TmpMatch {
-    pub query_index: u32,
-    pub distance: u8,
-    pub attribute: u16,
-    pub word_index: u16,
-    pub is_exact: bool,
-}
-
 #[derive(Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)]
 pub struct Document {
    pub id: DocumentId,
    pub highlights: Vec<Highlight>,

-    #[cfg(test)]
-    pub matches: Vec<TmpMatch>,
-}
-
-impl Document {
-    #[cfg(not(test))]
-    fn from_raw(raw: RawDocument) -> Document {
-        Document {
-            id: raw.id,
-            highlights: raw.highlights,
-        }
-    }
-
-    #[cfg(test)]
-    fn from_raw(raw: RawDocument) -> Document {
-        let len = raw.query_index().len();
-        let mut matches = Vec::with_capacity(len);
-
-        let query_index = raw.query_index();
-        let distance = raw.distance();
-        let attribute = raw.attribute();
-        let word_index = raw.word_index();
-        let is_exact = raw.is_exact();
-
-        for i in 0..len {
-            let match_ = TmpMatch {
-                query_index: query_index[i],
-                distance: distance[i],
-                attribute: attribute[i],
-                word_index: word_index[i],
-                is_exact: is_exact[i],
-            };
-            matches.push(match_);
-        }
-
-        Document {
-            id: raw.id,
-            matches,
-            highlights: raw.highlights,
-        }
-    }
+    // #[cfg(test)]
+    // pub matches: Vec<TmpMatch>,
 }

 #[cfg(test)]
--- a/meilisearch-core/src/query_builder.rs
+++ b/meilisearch-core/src/query_builder.rs
@ -1,21 +1,8 @@
-use hashbrown::HashMap;
-use std::convert::TryFrom;
 use std::ops::Range;
-use std::rc::Rc;
-use std::time::{Duration, Instant};
-use std::{cmp, mem};
-
-use fst::{IntoStreamer, Streamer};
-use log::debug;
-use sdset::SetBuf;
-use slice_group_by::{GroupBy, GroupByMut};
+use std::time::Duration;

 use crate::{bucket_sort::bucket_sort, database::MainT};
-use crate::automaton::{Automaton, AutomatonGroup, AutomatonProducer, QueryEnhancer};
-use crate::distinct_map::{BufferedDistinctMap, DistinctMap};
-use crate::levenshtein::prefix_damerau_levenshtein;
-use crate::raw_document::{raw_documents_from, RawDocument};
-use crate::{criterion::Criteria, Document, DocumentId, Highlight, TmpMatch, AttrCount};
+use crate::{criterion::Criteria, Document, DocumentId};
 use crate::{reordered_attrs::ReorderedAttrs, store, MResult};

 pub struct QueryBuilder<'c, 'f, 'd> {
@ -30,292 +17,6 @@ pub struct QueryBuilder<'c, 'f, 'd> {
    synonyms_store: store::Synonyms,
 }

-fn multiword_rewrite_matches(
-    mut matches: Vec<(DocumentId, TmpMatch)>,
-    query_enhancer: &QueryEnhancer,
-) -> SetBuf<(DocumentId, TmpMatch)> {
-    let mut padded_matches = Vec::with_capacity(matches.len());
-
-    let before_sort = Instant::now();
-    // we sort the matches by word index to make them rewritable
-    matches.sort_unstable_by_key(|(id, match_)| (*id, match_.attribute, match_.word_index));
-    debug!("sorting dirty matches took {:.02?}", before_sort.elapsed());
-
-    let before_padding = Instant::now();
-    // for each attribute of each document
-    for same_document_attribute in matches.linear_group_by_key(|(id, m)| (*id, m.attribute)) {
-        // padding will only be applied
-        // to word indices in the same attribute
-        let mut padding = 0;
-        let mut iter = same_document_attribute.linear_group_by_key(|(_, m)| m.word_index);
-
-        // for each match at the same position
-        // in this document attribute
-        while let Some(same_word_index) = iter.next() {
-            // find the biggest padding
-            let mut biggest = 0;
-            for (id, match_) in same_word_index {
-                let mut replacement = query_enhancer.replacement(match_.query_index);
-                let replacement_len = replacement.len();
-                let nexts = iter.remainder().linear_group_by_key(|(_, m)| m.word_index);
-
-                if let Some(query_index) = replacement.next() {
-                    let word_index = match_.word_index + padding as u16;
-                    let match_ = TmpMatch {
-                        query_index,
-                        word_index,
-                        ..*match_
-                    };
-                    padded_matches.push((*id, match_));
-                }
-
-                let mut found = false;
-
-                // look ahead and if there already is a match
-                // corresponding to this padding word, abort the padding
-                'padding: for (x, next_group) in nexts.enumerate() {
-                    for (i, query_index) in replacement.clone().enumerate().skip(x) {
-                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
-                        let padmatch = TmpMatch {
-                            query_index,
-                            word_index,
-                            ..*match_
-                        };
-
-                        for (_, nmatch_) in next_group {
-                            let mut rep = query_enhancer.replacement(nmatch_.query_index);
-                            let query_index = rep.next().unwrap();
-                            if query_index == padmatch.query_index {
-                                if !found {
-                                    // if we find a corresponding padding for the
-                                    // first time we must push preceding paddings
-                                    for (i, query_index) in replacement.clone().enumerate().take(i)
-                                    {
-                                        let word_index =
-                                            match_.word_index + padding as u16 + (i + 1) as u16;
-                                        let match_ = TmpMatch {
-                                            query_index,
-                                            word_index,
-                                            ..*match_
-                                        };
-                                        padded_matches.push((*id, match_));
-                                        biggest = biggest.max(i + 1);
-                                    }
-                                }
-
-                                padded_matches.push((*id, padmatch));
-                                found = true;
-                                continue 'padding;
-                            }
-                        }
-                    }
-
-                    // if we do not find a corresponding padding in the
-                    // next groups so stop here and pad what was found
-                    break;
-                }
-
-                if !found {
-                    // if no padding was found in the following matches
-                    // we must insert the entire padding
-                    for (i, query_index) in replacement.enumerate() {
-                        let word_index = match_.word_index + padding as u16 + (i + 1) as u16;
-                        let match_ = TmpMatch {
-                            query_index,
-                            word_index,
-                            ..*match_
-                        };
-                        padded_matches.push((*id, match_));
-                    }
-
-                    biggest = biggest.max(replacement_len - 1);
-                }
-            }
-
-            padding += biggest;
-        }
-    }
-
-    for document_matches in padded_matches.linear_group_by_key_mut(|(id, _)| *id) {
-        document_matches.sort_unstable();
-    }
-
-    debug!("padding matches took {:.02?}", before_padding.elapsed());
-
-    // With this check we can see that the loop above takes something
-    // like 43% of the search time even when no rewrite is needed.
-    // assert_eq!(before_matches, padded_matches);
-
-    SetBuf::new_unchecked(padded_matches)
-}
-
-fn fetch_raw_documents(
-    reader: &heed::RoTxn<MainT>,
-    automatons_groups: &[AutomatonGroup],
-    query_enhancer: &QueryEnhancer,
-    searchables: Option<&ReorderedAttrs>,
-    main_store: store::Main,
-    postings_lists_store: store::PostingsLists,
-) -> MResult<Vec<RawDocument>> {
-    let mut matches = Vec::new();
-    let mut highlights = Vec::new();
-
-    let words = match main_store.words_fst(reader)? {
-        Some(words) => words,
-        None => return Ok(Vec::new()),
-    };
-
-    let before_automatons_groups_loop = Instant::now();
-    let mut doc_indexes_rewrite = Duration::default();
-    let mut retrieve_postings_lists = Duration::default();
-    let mut stream_reserve = Duration::default();
-    let mut covered_area_time = Duration::default();
-    let mut eval_time = Duration::default();
-
-    for group in automatons_groups {
-        let AutomatonGroup { is_phrase_query, automatons } = group;
-        let phrase_query_len = automatons.len();
-
-        let mut tmp_matches = Vec::new();
-        for (id, automaton) in automatons.into_iter().enumerate() {
-            let Automaton { index, is_exact, query_len, query, .. } = automaton;
-            let dfa = automaton.dfa();
-
-            let before_stream_loop = Instant::now();
-            let mut stream_count = 0;
-
-            let mut stream = words.search(&dfa).into_stream();
-            while let Some(input) = stream.next() {
-                let before_eval_time = Instant::now();
-                let distance = dfa.eval(input).to_u8();
-                eval_time += before_eval_time.elapsed();
-
-                let is_exact = *is_exact && distance == 0 && input.len() == *query_len;
-
-                stream_count += 1;
-
-                let before_covered_area = Instant::now();
-                let covered_area = if *query_len > input.len() {
-                    input.len()
-                } else {
-                    prefix_damerau_levenshtein(query.as_bytes(), input).1
-                };
-                covered_area_time += before_covered_area.elapsed();
-
-                let before_retrieve_postings_lists = Instant::now();
-                let doc_indexes = match postings_lists_store.postings_list(reader, input)? {
-                    Some(doc_indexes) => doc_indexes,
-                    None => continue,
-                };
-                retrieve_postings_lists += before_retrieve_postings_lists.elapsed();
-
-                let before_stream_reserve = Instant::now();
-                tmp_matches.reserve(doc_indexes.len());
-                stream_reserve += before_stream_reserve.elapsed();
-
-                let before_doc_indexes_rewrite = Instant::now();
-                for di in doc_indexes.as_ref() {
-                    let attribute = searchables.map_or(Some(di.attribute), |r| r.get(di.attribute));
-                    if let Some(attribute) = attribute {
-                        let match_ = TmpMatch {
-                            query_index: *index as u32,
-                            distance,
-                            attribute,
-                            word_index: di.word_index,
-                            is_exact,
-                        };
-
-                        let covered_area = u16::try_from(covered_area).unwrap_or(u16::max_value());
-                        let covered_area = cmp::min(covered_area, di.char_length);
-
-                        let highlight = Highlight {
-                            attribute: di.attribute,
-                            char_index: di.char_index,
-                            char_length: covered_area,
-                        };
-
-                        tmp_matches.push((di.document_id, id, match_, highlight));
-                    }
-                }
-                doc_indexes_rewrite += before_doc_indexes_rewrite.elapsed();
-            }
-            debug!("{:?} took {:.02?} ({} words)", query, before_stream_loop.elapsed(), stream_count);
-        }
-
-        if *is_phrase_query {
-            tmp_matches.sort_unstable_by_key(|(id, _, m, _)| (*id, m.attribute, m.word_index));
-            for group in tmp_matches.linear_group_by_key(|(id, _, m, _)| (*id, m.attribute)) {
-                for window in group.windows(2) {
-                    let (ida, ia, ma, ha) = window[0];
-                    let (idb, ib, mb, hb) = window[1];
-
-                    debug_assert_eq!(ida, idb);
-
-                    // if matches must follow and actually follows themselves
-                    if ia + 1 == ib && ma.word_index + 1 == mb.word_index {
-                        // TODO we must make it work for phrase query longer than 2
-                        // if the second match is the last phrase query word
-                        if ib + 1 == phrase_query_len {
-                            // insert first match
-                            matches.push((ida, ma));
-                            highlights.push((ida, ha));
-
-                            // insert second match
-                            matches.push((idb, mb));
-                            highlights.push((idb, hb));
-                        }
-                    }
-                }
-            }
-        } else {
-            let before_rerewrite = Instant::now();
-
-            matches.reserve(tmp_matches.len());
-            highlights.reserve(tmp_matches.len());
-
-            for (id, _, match_, highlight) in tmp_matches {
-                matches.push((id, match_));
-                highlights.push((id, highlight));
-            }
-            debug!("rerewrite took {:.02?}", before_rerewrite.elapsed());
-        }
-    }
-    debug!("automatons_groups_loop took {:.02?}", before_automatons_groups_loop.elapsed());
-    debug!("doc_indexes_rewrite took {:.02?}", doc_indexes_rewrite);
-    debug!("retrieve_postings_lists took {:.02?}", retrieve_postings_lists);
-    debug!("stream reserve took {:.02?}", stream_reserve);
-    debug!("covered area took {:.02?}", covered_area_time);
-    debug!("eval value took {:.02?}", eval_time);
-
-    // {
-    //     let mut cloned = matches.clone();
-    //     let before_sort_test = Instant::now();
-    //     cloned.sort_unstable_by_key(|(id, m)| (*id, m.query_index, m.distance));
-    //     debug!("sorting test took {:.02?}", before_sort_test.elapsed());
-    // }
-
-    let before_multiword_rewrite_matches = Instant::now();
-    debug!("number of matches before rewrite {}", matches.len());
-    debug!("{:?}", query_enhancer);
-    let matches = multiword_rewrite_matches(matches, &query_enhancer);
-    debug!("number of matches after rewrite {}", matches.len());
-    debug!("multiword_rewrite_matches took {:.02?}", before_multiword_rewrite_matches.elapsed());
-
-    let before_highlight_sorting = Instant::now();
-    let highlights = {
-        highlights.sort_unstable_by_key(|(id, _)| *id);
-        SetBuf::new_unchecked(highlights)
-    };
-    debug!("highlight_sorting {:.02?}", before_highlight_sorting.elapsed());
-
-    let before_raw_documents = Instant::now();
-    let raw_documents = raw_documents_from(matches, highlights);
-    debug!("raw_documents took {:.02?}", before_raw_documents.elapsed());
-    debug!("documents to worry about: {}", raw_documents.len());
-
-    Ok(raw_documents)
-}
-
 impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
    pub fn new(
        main: store::Main,
@ -389,7 +90,7 @@ impl<'c, 'f, 'd> QueryBuilder<'c, 'f, 'd> {
                reader,
                query,
                range,
-                // self.criteria,
+                self.criteria,
                self.main_store,
                self.postings_lists_store,
                self.documents_fields_counts_store,
--- a/meilisearch-core/src/raw_document.rs
+++ b/meilisearch-core/src/raw_document.rs
@ -1,183 +1,89 @@
-use std::fmt;
-use std::sync::Arc;
-
+use compact_arena::SmallArena;
+use itertools::EitherOrBoth;
 use sdset::SetBuf;
-use slice_group_by::GroupBy;

-use crate::{DocumentId, Highlight, TmpMatch, AttrCount};
+use crate::bucket_sort::{SimpleMatch, BareMatch, QueryWordAutomaton, PostingsListView};

-#[derive(Clone)]
-pub struct RawDocument {
-    pub id: DocumentId,
-    pub matches: SharedMatches,
-    pub highlights: Vec<Highlight>,
-    pub fields_counts: Option<SetBuf<AttrCount>>,
+pub struct RawDocument<'a, 'tag> {
+    pub id: crate::DocumentId,
+    pub raw_matches: &'a mut [BareMatch<'tag>],
+    pub processed_matches: Vec<SimpleMatch>,
+    /// The list of minimum `distance` found
+    pub processed_distances: Vec<Option<u8>>,
 }

-impl RawDocument {
-    pub fn query_index(&self) -> &[u32] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe {
-            &self
-                .matches
-                .matches
-                .query_index
-                .get_unchecked(r.start..r.end)
+impl<'a, 'tag> RawDocument<'a, 'tag> {
+    pub fn new<'txn>(
+        raw_matches: &'a mut [BareMatch<'tag>],
+        automatons: &[QueryWordAutomaton],
+        postings_lists: &mut SmallArena<'tag, PostingsListView<'txn>>,
+    ) -> Option<RawDocument<'a, 'tag>>
+    {
+        raw_matches.sort_unstable_by_key(|m| m.query_index);
+
+        let mut previous_word = None;
+        for i in 0..raw_matches.len() {
+            let a = &raw_matches[i];
+            let auta = &automatons[a.query_index as usize];
+
+            match auta.phrase_query {
+                Some((0, _)) => {
+                    let b = match raw_matches.get(i + 1) {
+                        Some(b) => b,
+                        None => {
+                            postings_lists[a.postings_list].rewrite_with(SetBuf::default());
+                            continue;
+                        }
+                    };
+
+                    if a.query_index + 1 != b.query_index {
+                        postings_lists[a.postings_list].rewrite_with(SetBuf::default());
+                        continue
+                    }
+
+                    let pla = &postings_lists[a.postings_list];
+                    let plb = &postings_lists[b.postings_list];
+
+                    let mut iter = itertools::merge_join_by(pla.iter(), plb.iter(), |a, b| {
+                        a.attribute.cmp(&b.attribute).then((a.word_index + 1).cmp(&b.word_index))
+                    });
+
+                    let mut newa = Vec::new();
+                    let mut newb = Vec::new();
+
+                    for eb in iter {
+                        if let EitherOrBoth::Both(a, b) = eb {
+                            newa.push(*a);
+                            newb.push(*b);
+                        }
+                    }
+
+                    if !newa.is_empty() {
+                        previous_word = Some(a.query_index);
+                    }
+
+                    postings_lists[a.postings_list].rewrite_with(SetBuf::new_unchecked(newa));
+                    postings_lists[b.postings_list].rewrite_with(SetBuf::new_unchecked(newb));
+                },
+                Some((1, _)) => {
+                    if previous_word.take() != Some(a.query_index - 1) {
+                        postings_lists[a.postings_list].rewrite_with(SetBuf::default());
+                    }
+                },
+                Some((_, _)) => unreachable!(),
+                None => (),
+            }
        }
-    }

-    pub fn distance(&self) -> &[u8] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.distance.get_unchecked(r.start..r.end) }
-    }
-
-    pub fn attribute(&self) -> &[u16] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.attribute.get_unchecked(r.start..r.end) }
-    }
-
-    pub fn word_index(&self) -> &[u16] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe {
-            &self
-                .matches
-                .matches
-                .word_index
-                .get_unchecked(r.start..r.end)
+        if raw_matches.iter().all(|rm| postings_lists[rm.postings_list].is_empty()) {
+            return None
        }
-    }

-    pub fn is_exact(&self) -> &[bool] {
-        let r = self.matches.range;
-        // it is safe because construction/modifications
-        // can only be done in this module
-        unsafe { &self.matches.matches.is_exact.get_unchecked(r.start..r.end) }
-    }
-}
-
-impl fmt::Debug for RawDocument {
-    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
-        f.write_str("RawDocument {\r\n")?;
-        f.write_fmt(format_args!("{:>15}: {:?},\r\n", "id", self.id))?;
-        f.write_fmt(format_args!(
-            "{:>15}: {:^5?},\r\n",
-            "query_index",
-            self.query_index()
-        ))?;
-        f.write_fmt(format_args!(
-            "{:>15}: {:^5?},\r\n",
-            "distance",
-            self.distance()
-        ))?;
-        f.write_fmt(format_args!(
-            "{:>15}: {:^5?},\r\n",
-            "attribute",
-            self.attribute()
-        ))?;
-        f.write_fmt(format_args!(
-            "{:>15}: {:^5?},\r\n",
-            "word_index",
-            self.word_index()
-        ))?;
-        f.write_fmt(format_args!(
-            "{:>15}: {:^5?},\r\n",
-            "is_exact",
-            self.is_exact()
-        ))?;
-        f.write_str("}")?;
-        Ok(())
-    }
-}
-
-pub fn raw_documents_from(
-    matches: SetBuf<(DocumentId, TmpMatch)>,
-    highlights: SetBuf<(DocumentId, Highlight)>
-) -> Vec<RawDocument> {
-    let mut docs_ranges: Vec<(_, Range, _, _)> = Vec::new();
-    let mut matches2 = Matches::with_capacity(matches.len());
-
-    let matches = matches.linear_group_by_key(|(id, _)| *id);
-    let highlights = highlights.linear_group_by_key(|(id, _)| *id);
-
-    for (mgroup, hgroup) in matches.zip(highlights) {
-        assert_eq!(mgroup[0].0, hgroup[0].0);
-
-        let document_id = mgroup[0].0;
-        let start = docs_ranges.last().map(|(_, r, _, _)| r.end).unwrap_or(0);
-        let end = start + mgroup.len();
-        let highlights = hgroup.iter().map(|(_, h)| *h).collect();
-        let fields_counts = None;
-
-        docs_ranges.push((document_id, Range { start, end }, highlights, fields_counts));
-        // TODO we could try to keep both data
-        //  - the data oriented one and,
-        //  - the raw one, the one that comes from the arguments of this function
-        // This way we would be able to only produce data oriented lazily.
-        //
-        // For example the default first criterion is `SumOfTypos`
-        // and just needs the `query_index` and the `distance` fields.
-        // It would probably be good to avoid wasting time sorting other fields of documents
-        // that will never ever reach the second criterion.
-        matches2.extend_from_slice(mgroup);
-    }
-
-    let matches = Arc::new(matches2);
-    docs_ranges
-        .into_iter()
-        .map(|(id, range, highlights, fields_counts)| {
-            let matches = SharedMatches { range, matches: matches.clone() };
-            RawDocument { id, matches, highlights, fields_counts }
+        Some(RawDocument {
+            id: raw_matches[0].document_id,
+            raw_matches,
+            processed_matches: Vec::new(),
+            processed_distances: Vec::new(),
        })
-        .collect()
-}
-
-#[derive(Debug, Copy, Clone)]
-struct Range {
-    start: usize,
-    end: usize,
-}
-
-#[derive(Clone)]
-pub struct SharedMatches {
-    range: Range,
-    matches: Arc<Matches>,
-}
-
-#[derive(Clone)]
-struct Matches {
-    query_index: Vec<u32>,
-    distance: Vec<u8>,
-    attribute: Vec<u16>,
-    word_index: Vec<u16>,
-    is_exact: Vec<bool>,
-}
-
-impl Matches {
-    fn with_capacity(cap: usize) -> Matches {
-        Matches {
-            query_index: Vec::with_capacity(cap),
-            distance: Vec::with_capacity(cap),
-            attribute: Vec::with_capacity(cap),
-            word_index: Vec::with_capacity(cap),
-            is_exact: Vec::with_capacity(cap),
-        }
-    }
-
-    fn extend_from_slice(&mut self, matches: &[(DocumentId, TmpMatch)]) {
-        for (_, match_) in matches {
-            self.query_index.push(match_.query_index);
-            self.distance.push(match_.distance);
-            self.attribute.push(match_.attribute);
-            self.word_index.push(match_.word_index);
-            self.is_exact.push(match_.is_exact);
-        }
    }
 }
--- a/meilisearch-http/src/helpers/meilisearch.rs
+++ b/meilisearch-http/src/helpers/meilisearch.rs
@ -310,11 +310,11 @@ impl<'a> SearchBuilder<'a> {
            if let Some(ranking_rules_order) = ranking_order {
                for rule in ranking_rules_order {
                    match rule.as_str() {
-                        "_sum_of_typos" => builder.push(SumOfTypos),
-                        "_number_of_words" => builder.push(NumberOfWords),
-                        "_word_proximity" => builder.push(WordsProximity),
-                        "_sum_of_words_attribute" => builder.push(SumOfWordsAttribute),
-                        "_sum_of_words_position" => builder.push(SumOfWordsPosition),
+                        "_typo" => builder.push(Typo),
+                        "_words" => builder.push(Words),
+                        "_proximity" => builder.push(Proximity),
+                        "_attribute" => builder.push(Attribute),
+                        "_words_position" => builder.push(WordsPosition),
                        "_exact" => builder.push(Exact),
                        _ => {
                            let order = match ranking_rules.get(rule.as_str()) {
@ -340,11 +340,11 @@ impl<'a> SearchBuilder<'a> {
                builder.push(DocumentId);
                return Ok(Some(builder.build()));
            } else {
-                builder.push(SumOfTypos);
-                builder.push(NumberOfWords);
-                builder.push(WordsProximity);
-                builder.push(SumOfWordsAttribute);
-                builder.push(SumOfWordsPosition);
+                builder.push(Typo);
+                builder.push(Words);
+                builder.push(Proximity);
+                builder.push(Attribute);
+                builder.push(WordsPosition);
                builder.push(Exact);
                for (rule, order) in ranking_rules.iter() {
                    let custom_ranking = match order {