Rename MeiliDB into MeiliSearch

2025-07-04 20:37:15 +02:00 · 2019-11-26 11:06:55 +01:00 · 2019-11-26 11:06:55 +01:00 · 7cc096e0a2
commit 7cc096e0a2
parent 58eaf78dc4
94 changed files with 126 additions and 126 deletions
--- a/meilisearch-core/src/criterion/document_id.rs
+++ b/meilisearch-core/src/criterion/document_id.rs
@ -0,0 +1,16 @@
+use crate::criterion::Criterion;
+use crate::RawDocument;
+use std::cmp::Ordering;
+
+#[derive(Debug, Clone, Copy)]
+pub struct DocumentId;
+
+impl Criterion for DocumentId {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        lhs.id.cmp(&rhs.id)
+    }
+
+    fn name(&self) -> &str {
+        "DocumentId"
+    }
+}
--- a/meilisearch-core/src/criterion/exact.rs
+++ b/meilisearch-core/src/criterion/exact.rs
@ -0,0 +1,132 @@
+use std::cmp::Ordering;
+
+use meilisearch_schema::SchemaAttr;
+use sdset::Set;
+use slice_group_by::GroupBy;
+
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+#[inline]
+fn number_exact_matches(
+    query_index: &[u32],
+    attribute: &[u16],
+    is_exact: &[bool],
+    fields_counts: &Set<(SchemaAttr, u64)>,
+) -> usize {
+    let mut count = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        let len = group.len();
+
+        let mut found_exact = false;
+        for (pos, is_exact) in is_exact[index..index + len].iter().enumerate() {
+            if *is_exact {
+                found_exact = true;
+                let attr = &attribute[index + pos];
+                if let Ok(pos) = fields_counts.binary_search_by_key(attr, |(a, _)| a.0) {
+                    let (_, count) = fields_counts[pos];
+                    if count == 1 {
+                        return usize::max_value();
+                    }
+                }
+            }
+        }
+
+        count += found_exact as usize;
+        index += len;
+    }
+
+    count
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct Exact;
+
+impl Criterion for Exact {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let is_exact = lhs.is_exact();
+            let attribute = lhs.attribute();
+            let fields_counts = &lhs.fields_counts;
+
+            number_exact_matches(query_index, attribute, is_exact, fields_counts)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let is_exact = rhs.is_exact();
+            let attribute = rhs.attribute();
+            let fields_counts = &rhs.fields_counts;
+
+            number_exact_matches(query_index, attribute, is_exact, fields_counts)
+        };
+
+        lhs.cmp(&rhs).reverse()
+    }
+
+    fn name(&self) -> &str {
+        "Exact"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "soulier"
+    //
+    // doc0: "Soulier bleu"
+    // doc1: "souliereres rouge"
+    #[test]
+    fn easy_case() {
+        let doc0 = {
+            let query_index = &[0];
+            let attribute = &[0];
+            let is_exact = &[true];
+            let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap();
+
+            number_exact_matches(query_index, attribute, is_exact, fields_counts)
+        };
+
+        let doc1 = {
+            let query_index = &[0];
+            let attribute = &[0];
+            let is_exact = &[false];
+            let fields_counts = Set::new(&[(SchemaAttr(0), 2)]).unwrap();
+
+            number_exact_matches(query_index, attribute, is_exact, fields_counts)
+        };
+
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+
+    // typing: "soulier"
+    //
+    // doc0: { 0. "soulier" }
+    // doc1: { 0. "soulier bleu et blanc" }
+    #[test]
+    fn basic() {
+        let doc0 = {
+            let query_index = &[0];
+            let attribute = &[0];
+            let is_exact = &[true];
+            let fields_counts = Set::new(&[(SchemaAttr(0), 1)]).unwrap();
+
+            number_exact_matches(query_index, attribute, is_exact, fields_counts)
+        };
+
+        let doc1 = {
+            let query_index = &[0];
+            let attribute = &[0];
+            let is_exact = &[true];
+            let fields_counts = Set::new(&[(SchemaAttr(0), 4)]).unwrap();
+
+            number_exact_matches(query_index, attribute, is_exact, fields_counts)
+        };
+
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+}
--- a/meilisearch-core/src/criterion/mod.rs
+++ b/meilisearch-core/src/criterion/mod.rs
@ -0,0 +1,121 @@
+mod document_id;
+mod exact;
+mod number_of_words;
+mod sort_by_attr;
+mod sum_of_typos;
+mod sum_of_words_attribute;
+mod sum_of_words_position;
+mod words_proximity;
+
+use crate::RawDocument;
+use std::cmp::Ordering;
+
+pub use self::{
+    document_id::DocumentId, exact::Exact, number_of_words::NumberOfWords,
+    sort_by_attr::SortByAttr, sum_of_typos::SumOfTypos,
+    sum_of_words_attribute::SumOfWordsAttribute, sum_of_words_position::SumOfWordsPosition,
+    words_proximity::WordsProximity,
+};
+
+pub trait Criterion: Send + Sync {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering;
+
+    fn name(&self) -> &str;
+
+    #[inline]
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        self.evaluate(lhs, rhs) == Ordering::Equal
+    }
+}
+
+impl<'a, T: Criterion + ?Sized + Send + Sync> Criterion for &'a T {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        (**self).evaluate(lhs, rhs)
+    }
+
+    fn name(&self) -> &str {
+        (**self).name()
+    }
+
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        (**self).eq(lhs, rhs)
+    }
+}
+
+impl<T: Criterion + ?Sized> Criterion for Box<T> {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        (**self).evaluate(lhs, rhs)
+    }
+
+    fn name(&self) -> &str {
+        (**self).name()
+    }
+
+    fn eq(&self, lhs: &RawDocument, rhs: &RawDocument) -> bool {
+        (**self).eq(lhs, rhs)
+    }
+}
+
+#[derive(Default)]
+pub struct CriteriaBuilder<'a> {
+    inner: Vec<Box<dyn Criterion + 'a>>,
+}
+
+impl<'a> CriteriaBuilder<'a> {
+    pub fn new() -> CriteriaBuilder<'a> {
+        CriteriaBuilder { inner: Vec::new() }
+    }
+
+    pub fn with_capacity(capacity: usize) -> CriteriaBuilder<'a> {
+        CriteriaBuilder {
+            inner: Vec::with_capacity(capacity),
+        }
+    }
+
+    pub fn reserve(&mut self, additional: usize) {
+        self.inner.reserve(additional)
+    }
+
+    pub fn add<C: 'a>(mut self, criterion: C) -> CriteriaBuilder<'a>
+    where
+        C: Criterion,
+    {
+        self.push(criterion);
+        self
+    }
+
+    pub fn push<C: 'a>(&mut self, criterion: C)
+    where
+        C: Criterion,
+    {
+        self.inner.push(Box::new(criterion));
+    }
+
+    pub fn build(self) -> Criteria<'a> {
+        Criteria { inner: self.inner }
+    }
+}
+
+pub struct Criteria<'a> {
+    inner: Vec<Box<dyn Criterion + 'a>>,
+}
+
+impl<'a> Default for Criteria<'a> {
+    fn default() -> Self {
+        CriteriaBuilder::with_capacity(7)
+            .add(SumOfTypos)
+            .add(NumberOfWords)
+            .add(WordsProximity)
+            .add(SumOfWordsAttribute)
+            .add(SumOfWordsPosition)
+            .add(Exact)
+            .add(DocumentId)
+            .build()
+    }
+}
+
+impl<'a> AsRef<[Box<dyn Criterion + 'a>]> for Criteria<'a> {
+    fn as_ref(&self) -> &[Box<dyn Criterion + 'a>] {
+        &self.inner
+    }
+}
--- a/meilisearch-core/src/criterion/number_of_words.rs
+++ b/meilisearch-core/src/criterion/number_of_words.rs
@ -0,0 +1,31 @@
+use crate::criterion::Criterion;
+use crate::RawDocument;
+use slice_group_by::GroupBy;
+use std::cmp::Ordering;
+
+#[inline]
+fn number_of_query_words(query_index: &[u32]) -> usize {
+    query_index.linear_group().count()
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct NumberOfWords;
+
+impl Criterion for NumberOfWords {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            number_of_query_words(query_index)
+        };
+        let rhs = {
+            let query_index = rhs.query_index();
+            number_of_query_words(query_index)
+        };
+
+        lhs.cmp(&rhs).reverse()
+    }
+
+    fn name(&self) -> &str {
+        "NumberOfWords"
+    }
+}
--- a/meilisearch-core/src/criterion/sort_by_attr.rs
+++ b/meilisearch-core/src/criterion/sort_by_attr.rs
@ -0,0 +1,130 @@
+use std::cmp::Ordering;
+use std::error::Error;
+use std::fmt;
+
+use crate::criterion::Criterion;
+use crate::{RankedMap, RawDocument};
+use meilisearch_schema::{Schema, SchemaAttr};
+
+/// An helper struct that permit to sort documents by
+/// some of their stored attributes.
+///
+/// # Note
+///
+/// If a document cannot be deserialized it will be considered [`None`][].
+///
+/// Deserialized documents are compared like `Some(doc0).cmp(&Some(doc1))`,
+/// so you must check the [`Ord`] of `Option` implementation.
+///
+/// [`None`]: https://doc.rust-lang.org/std/option/enum.Option.html#variant.None
+/// [`Ord`]: https://doc.rust-lang.org/std/option/enum.Option.html#impl-Ord
+///
+/// # Example
+///
+/// ```ignore
+/// use serde_derive::Deserialize;
+/// use meilisearch::rank::criterion::*;
+///
+/// let custom_ranking = SortByAttr::lower_is_better(&ranked_map, &schema, "published_at")?;
+///
+/// let builder = CriteriaBuilder::with_capacity(8)
+///        .add(SumOfTypos)
+///        .add(NumberOfWords)
+///        .add(WordsProximity)
+///        .add(SumOfWordsAttribute)
+///        .add(SumOfWordsPosition)
+///        .add(Exact)
+///        .add(custom_ranking)
+///        .add(DocumentId);
+///
+/// let criterion = builder.build();
+///
+/// ```
+pub struct SortByAttr<'a> {
+    ranked_map: &'a RankedMap,
+    attr: SchemaAttr,
+    reversed: bool,
+}
+
+impl<'a> SortByAttr<'a> {
+    pub fn lower_is_better(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+    ) -> Result<SortByAttr<'a>, SortByAttrError> {
+        SortByAttr::new(ranked_map, schema, attr_name, false)
+    }
+
+    pub fn higher_is_better(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+    ) -> Result<SortByAttr<'a>, SortByAttrError> {
+        SortByAttr::new(ranked_map, schema, attr_name, true)
+    }
+
+    fn new(
+        ranked_map: &'a RankedMap,
+        schema: &Schema,
+        attr_name: &str,
+        reversed: bool,
+    ) -> Result<SortByAttr<'a>, SortByAttrError> {
+        let attr = match schema.attribute(attr_name) {
+            Some(attr) => attr,
+            None => return Err(SortByAttrError::AttributeNotFound),
+        };
+
+        if !schema.props(attr).is_ranked() {
+            return Err(SortByAttrError::AttributeNotRegisteredForRanking);
+        }
+
+        Ok(SortByAttr {
+            ranked_map,
+            attr,
+            reversed,
+        })
+    }
+}
+
+impl<'a> Criterion for SortByAttr<'a> {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = self.ranked_map.get(lhs.id, self.attr);
+        let rhs = self.ranked_map.get(rhs.id, self.attr);
+
+        match (lhs, rhs) {
+            (Some(lhs), Some(rhs)) => {
+                let order = lhs.cmp(&rhs);
+                if self.reversed {
+                    order.reverse()
+                } else {
+                    order
+                }
+            }
+            (None, Some(_)) => Ordering::Greater,
+            (Some(_), None) => Ordering::Less,
+            (None, None) => Ordering::Equal,
+        }
+    }
+
+    fn name(&self) -> &str {
+        "SortByAttr"
+    }
+}
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
+pub enum SortByAttrError {
+    AttributeNotFound,
+    AttributeNotRegisteredForRanking,
+}
+
+impl fmt::Display for SortByAttrError {
+    fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
+        use SortByAttrError::*;
+        match self {
+            AttributeNotFound => f.write_str("attribute not found in the schema"),
+            AttributeNotRegisteredForRanking => f.write_str("attribute not registered for ranking"),
+        }
+    }
+}
+
+impl Error for SortByAttrError {}
--- a/meilisearch-core/src/criterion/sum_of_typos.rs
+++ b/meilisearch-core/src/criterion/sum_of_typos.rs
@ -0,0 +1,116 @@
+use std::cmp::Ordering;
+
+use slice_group_by::GroupBy;
+
+use crate::criterion::Criterion;
+use crate::RawDocument;
+
+// This function is a wrong logarithmic 10 function.
+// It is safe to panic on input number higher than 3,
+// the number of typos is never bigger than that.
+#[inline]
+fn custom_log10(n: u8) -> f32 {
+    match n {
+        0 => 0.0,     // log(1)
+        1 => 0.30102, // log(2)
+        2 => 0.47712, // log(3)
+        3 => 0.60205, // log(4)
+        _ => panic!("invalid number"),
+    }
+}
+
+#[inline]
+fn sum_matches_typos(query_index: &[u32], distance: &[u8]) -> usize {
+    let mut number_words: usize = 0;
+    let mut sum_typos = 0.0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        sum_typos += custom_log10(distance[index]);
+        number_words += 1;
+        index += group.len();
+    }
+
+    (number_words as f32 / (sum_typos + 1.0) * 1000.0) as usize
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct SumOfTypos;
+
+impl Criterion for SumOfTypos {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let distance = lhs.distance();
+            sum_matches_typos(query_index, distance)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let distance = rhs.distance();
+            sum_matches_typos(query_index, distance)
+        };
+
+        lhs.cmp(&rhs).reverse()
+    }
+
+    fn name(&self) -> &str {
+        "SumOfTypos"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "Geox CEO"
+    //
+    // doc0: "Geox SpA: CEO and Executive"
+    // doc1: "Mt. Gox CEO Resigns From Bitcoin Foundation"
+    #[test]
+    fn one_typo_reference() {
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 0];
+
+        let query_index1 = &[0, 1];
+        let distance1 = &[1, 0];
+
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+
+    // typing: "bouton manchette"
+    //
+    // doc0: "bouton manchette"
+    // doc1: "bouton"
+    #[test]
+    fn no_typo() {
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 0];
+
+        let query_index1 = &[0];
+        let distance1 = &[0];
+
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+
+    // typing: "bouton manchztte"
+    //
+    // doc0: "bouton manchette"
+    // doc1: "bouton"
+    #[test]
+    fn one_typo() {
+        let query_index0 = &[0, 1];
+        let distance0 = &[0, 1];
+
+        let query_index1 = &[0];
+        let distance1 = &[0];
+
+        let doc0 = sum_matches_typos(query_index0, distance0);
+        let doc1 = sum_matches_typos(query_index1, distance1);
+        assert_eq!(doc0.cmp(&doc1).reverse(), Ordering::Less);
+    }
+}
--- a/meilisearch-core/src/criterion/sum_of_words_attribute.rs
+++ b/meilisearch-core/src/criterion/sum_of_words_attribute.rs
@ -0,0 +1,64 @@
+use crate::criterion::Criterion;
+use crate::RawDocument;
+use slice_group_by::GroupBy;
+use std::cmp::Ordering;
+
+#[inline]
+fn sum_matches_attributes(query_index: &[u32], attribute: &[u16]) -> usize {
+    let mut sum_attributes = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        sum_attributes += attribute[index] as usize;
+        index += group.len();
+    }
+
+    sum_attributes
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct SumOfWordsAttribute;
+
+impl Criterion for SumOfWordsAttribute {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let attribute = lhs.attribute();
+            sum_matches_attributes(query_index, attribute)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let attribute = rhs.attribute();
+            sum_matches_attributes(query_index, attribute)
+        };
+
+        lhs.cmp(&rhs)
+    }
+
+    fn name(&self) -> &str {
+        "SumOfWordsAttribute"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "soulier"
+    //
+    // doc0: { 0. "Soulier bleu", 1. "bla bla bla" }
+    // doc1: { 0. "Botte rouge", 1. "Soulier en cuir" }
+    #[test]
+    fn title_vs_description() {
+        let query_index0 = &[0];
+        let attribute0 = &[0];
+
+        let query_index1 = &[0];
+        let attribute1 = &[1];
+
+        let doc0 = sum_matches_attributes(query_index0, attribute0);
+        let doc1 = sum_matches_attributes(query_index1, attribute1);
+        assert_eq!(doc0.cmp(&doc1), Ordering::Less);
+    }
+}
--- a/meilisearch-core/src/criterion/sum_of_words_position.rs
+++ b/meilisearch-core/src/criterion/sum_of_words_position.rs
@ -0,0 +1,64 @@
+use crate::criterion::Criterion;
+use crate::RawDocument;
+use slice_group_by::GroupBy;
+use std::cmp::Ordering;
+
+#[inline]
+fn sum_matches_attribute_index(query_index: &[u32], word_index: &[u16]) -> usize {
+    let mut sum_word_index = 0;
+    let mut index = 0;
+
+    for group in query_index.linear_group() {
+        sum_word_index += word_index[index] as usize;
+        index += group.len();
+    }
+
+    sum_word_index
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct SumOfWordsPosition;
+
+impl Criterion for SumOfWordsPosition {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let word_index = lhs.word_index();
+            sum_matches_attribute_index(query_index, word_index)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let word_index = rhs.word_index();
+            sum_matches_attribute_index(query_index, word_index)
+        };
+
+        lhs.cmp(&rhs)
+    }
+
+    fn name(&self) -> &str {
+        "SumOfWordsPosition"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    // typing: "soulier"
+    //
+    // doc0: "Soulier bleu"
+    // doc1: "Botte rouge et soulier noir"
+    #[test]
+    fn easy_case() {
+        let query_index0 = &[0];
+        let word_index0 = &[0];
+
+        let query_index1 = &[0];
+        let word_index1 = &[3];
+
+        let doc0 = sum_matches_attribute_index(query_index0, word_index0);
+        let doc1 = sum_matches_attribute_index(query_index1, word_index1);
+        assert_eq!(doc0.cmp(&doc1), Ordering::Less);
+    }
+}
--- a/meilisearch-core/src/criterion/words_proximity.rs
+++ b/meilisearch-core/src/criterion/words_proximity.rs
@ -0,0 +1,164 @@
+use crate::criterion::Criterion;
+use crate::RawDocument;
+use slice_group_by::GroupBy;
+use std::cmp::{self, Ordering};
+
+const MAX_DISTANCE: u16 = 8;
+
+#[inline]
+fn clone_tuple<T: Clone, U: Clone>((a, b): (&T, &U)) -> (T, U) {
+    (a.clone(), b.clone())
+}
+
+fn index_proximity(lhs: u16, rhs: u16) -> u16 {
+    if lhs < rhs {
+        cmp::min(rhs - lhs, MAX_DISTANCE)
+    } else {
+        cmp::min(lhs - rhs, MAX_DISTANCE) + 1
+    }
+}
+
+fn attribute_proximity((lattr, lwi): (u16, u16), (rattr, rwi): (u16, u16)) -> u16 {
+    if lattr != rattr {
+        return MAX_DISTANCE;
+    }
+    index_proximity(lwi, rwi)
+}
+
+fn min_proximity((lattr, lwi): (&[u16], &[u16]), (rattr, rwi): (&[u16], &[u16])) -> u16 {
+    let mut min_prox = u16::max_value();
+
+    for a in lattr.iter().zip(lwi) {
+        for b in rattr.iter().zip(rwi) {
+            let a = clone_tuple(a);
+            let b = clone_tuple(b);
+            min_prox = cmp::min(min_prox, attribute_proximity(a, b));
+        }
+    }
+
+    min_prox
+}
+
+fn matches_proximity(
+    query_index: &[u32],
+    distance: &[u8],
+    attribute: &[u16],
+    word_index: &[u16],
+) -> u16 {
+    let mut query_index_groups = query_index.linear_group();
+    let mut proximity = 0;
+    let mut index = 0;
+
+    let get_attr_wi = |index: usize, group_len: usize| {
+        // retrieve the first distance group (with the lowest values)
+        let len = distance[index..index + group_len]
+            .linear_group()
+            .next()
+            .unwrap()
+            .len();
+
+        let rattr = &attribute[index..index + len];
+        let rwi = &word_index[index..index + len];
+
+        (rattr, rwi)
+    };
+
+    let mut last = query_index_groups.next().map(|group| {
+        let attr_wi = get_attr_wi(index, group.len());
+        index += group.len();
+        attr_wi
+    });
+
+    // iter by windows of size 2
+    while let (Some(lhs), Some(rhs)) = (last, query_index_groups.next()) {
+        let attr_wi = get_attr_wi(index, rhs.len());
+        proximity += min_proximity(lhs, attr_wi);
+        last = Some(attr_wi);
+        index += rhs.len();
+    }
+
+    proximity
+}
+
+#[derive(Debug, Clone, Copy)]
+pub struct WordsProximity;
+
+impl Criterion for WordsProximity {
+    fn evaluate(&self, lhs: &RawDocument, rhs: &RawDocument) -> Ordering {
+        let lhs = {
+            let query_index = lhs.query_index();
+            let distance = lhs.distance();
+            let attribute = lhs.attribute();
+            let word_index = lhs.word_index();
+            matches_proximity(query_index, distance, attribute, word_index)
+        };
+
+        let rhs = {
+            let query_index = rhs.query_index();
+            let distance = rhs.distance();
+            let attribute = rhs.attribute();
+            let word_index = rhs.word_index();
+            matches_proximity(query_index, distance, attribute, word_index)
+        };
+
+        lhs.cmp(&rhs)
+    }
+
+    fn name(&self) -> &str {
+        "WordsProximity"
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn three_different_attributes() {
+        // "soup" "of the" "the day"
+        //
+        // { id: 0, attr: 0, attr_index: 0 }
+        // { id: 1, attr: 1, attr_index: 0 }
+        // { id: 2, attr: 1, attr_index: 1 }
+        // { id: 2, attr: 2, attr_index: 0 }
+        // { id: 3, attr: 3, attr_index: 1 }
+
+        let query_index = &[0, 1, 2, 2, 3];
+        let distance = &[0, 0, 0, 0, 0];
+        let attribute = &[0, 1, 1, 2, 3];
+        let word_index = &[0, 0, 1, 0, 1];
+
+        //   soup -> of = 8
+        // + of -> the  = 1
+        // + the -> day = 8 (not 1)
+        assert_eq!(
+            matches_proximity(query_index, distance, attribute, word_index),
+            17
+        );
+    }
+
+    #[test]
+    fn two_different_attributes() {
+        // "soup day" "soup of the day"
+        //
+        // { id: 0, attr: 0, attr_index: 0 }
+        // { id: 0, attr: 1, attr_index: 0 }
+        // { id: 1, attr: 1, attr_index: 1 }
+        // { id: 2, attr: 1, attr_index: 2 }
+        // { id: 3, attr: 0, attr_index: 1 }
+        // { id: 3, attr: 1, attr_index: 3 }
+
+        let query_index = &[0, 0, 1, 2, 3, 3];
+        let distance = &[0, 0, 0, 0, 0, 0];
+        let attribute = &[0, 1, 1, 1, 0, 1];
+        let word_index = &[0, 0, 1, 2, 1, 3];
+
+        //   soup -> of = 1
+        // + of -> the  = 1
+        // + the -> day = 1
+        assert_eq!(
+            matches_proximity(query_index, distance, attribute, word_index),
+            3
+        );
+    }
+}