From 62b9c6fbee82cb0a1bf600457f74a49457f9bde2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= <loic.lecrenier@me.com>
Date: Tue, 4 Apr 2023 16:18:22 +0200
Subject: [PATCH] Add search tests

---
 milli/src/search/new/tests/mod.rs             |   3 +
 .../src/search/new/tests/ngram_split_words.rs | 255 ++++++++++++
 milli/src/search/new/tests/typo.rs            | 363 ++++++++++++++++++
 milli/src/search/new/tests/words_tms.rs       | 266 +++++++++++++
 4 files changed, 887 insertions(+)
 create mode 100644 milli/src/search/new/tests/mod.rs
 create mode 100644 milli/src/search/new/tests/ngram_split_words.rs
 create mode 100644 milli/src/search/new/tests/typo.rs
 create mode 100644 milli/src/search/new/tests/words_tms.rs

diff --git a/milli/src/search/new/tests/mod.rs b/milli/src/search/new/tests/mod.rs
new file mode 100644
index 000000000..eec4c62ec
--- /dev/null
+++ b/milli/src/search/new/tests/mod.rs
@@ -0,0 +1,3 @@
+pub mod ngram_split_words;
+pub mod typo;
+pub mod words_tms;
diff --git a/milli/src/search/new/tests/ngram_split_words.rs b/milli/src/search/new/tests/ngram_split_words.rs
new file mode 100644
index 000000000..06c49274c
--- /dev/null
+++ b/milli/src/search/new/tests/ngram_split_words.rs
@@ -0,0 +1,255 @@
+/*!
+This module tests the following properties:
+
+1. Two consecutive words from a query can be combined into a "2gram"
+2. Three consecutive words from a query can be combined into a "3gram"
+3. A word from the query can be split into two consecutive words (split words)
+4. A 2gram can be split into two words
+5. A 3gram cannot be split into two words
+6. 2grams can contain up to 1 typo
+7. 3grams cannot have typos
+8. 2grams and 3grams can be prefix tolerant
+9. Disabling typo tolerance also disable the split words feature
+10. Disabling typo tolerance does not disable prefix tolerance
+11. Disabling typo tolerance does not disable ngram tolerance
+12. Prefix tolerance is disabled for the last word if a space follows it
+13. Ngrams cannot be formed by combining a phrase and a word or two phrases
+*/
+
+use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy};
+
+fn create_index() -> TempIndex {
+    let index = TempIndex::new();
+
+    index
+        .update_settings(|s| {
+            s.set_primary_key("id".to_owned());
+            s.set_searchable_fields(vec!["text".to_owned()]);
+            s.set_criteria(vec![Criterion::Words]);
+        })
+        .unwrap();
+
+    index
+        .add_documents(documents!([
+            {
+                "id": 0,
+                "text": "the sun flowers are pretty"
+            },
+            {
+                "id": 1,
+                "text": "the sun flower is tall"
+            },
+            {
+                "id": 2,
+                "text": "the sunflowers are pretty"
+            },
+            {
+                "id": 3,
+                "text": "the sunflower is tall"
+            }
+        ]))
+        .unwrap();
+    index
+}
+
+#[test]
+fn test_2gram_simple() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_autorize_typos(false);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("sun flower");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    // will also match documents with "sun flower"
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
+}
+#[test]
+fn test_3gram_simple() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_autorize_typos(false);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("sun flower s are");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2]");
+}
+
+#[test]
+fn test_2gram_typo() {
+    let index = create_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("sun flawer");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
+}
+
+#[test]
+fn test_no_disable_ngrams() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_autorize_typos(false);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("sun flower ");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    // documents containing `sunflower`
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 3]");
+}
+
+#[test]
+fn test_2gram_prefix() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_autorize_typos(false);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("sun flow");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    // documents containing words beginning with `sunflow`
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1, 2, 3]");
+}
+
+#[test]
+fn test_3gram_prefix() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_autorize_typos(false);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("su nf l");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // documents containing a word beginning with sunfl
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]");
+}
+
+#[test]
+fn test_split_words() {
+    let index = create_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("sunflower ");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // all the documents with either `sunflower` or `sun flower`
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]");
+}
+
+#[test]
+fn test_disable_split_words() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_autorize_typos(false);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("sunflower ");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    // no document containing `sun flower`
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
+}
+
+#[test]
+fn test_2gram_split_words() {
+    let index = create_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("sunf lower");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // all the documents with "sunflower", "sun flower", or (sunflower + 1 typo)
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1, 2, 3]");
+}
+
+#[test]
+fn test_3gram_no_split_words() {
+    let index = create_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("sunf lo wer");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // no document with `sun flower`
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[2, 3]");
+}
+
+#[test]
+fn test_3gram_no_typos() {
+    let index = create_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("sunf la wer");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+}
+
+#[test]
+fn test_no_ngram_phrases() {
+    let index = create_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("\"sun\" flower");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 1]");
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("\"sun\" \"flower\"");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[1]");
+}
diff --git a/milli/src/search/new/tests/typo.rs b/milli/src/search/new/tests/typo.rs
new file mode 100644
index 000000000..6ac8f5516
--- /dev/null
+++ b/milli/src/search/new/tests/typo.rs
@@ -0,0 +1,363 @@
+/*!
+This module tests the following properties:
+
+1. The `words` ranking rule is typo-tolerant
+2. Typo-tolerance handles missing letters, extra letters, replaced letters, and swapped letters (at least)
+3. Words which are < `min_word_len_one_typo` are not typo tolerant
+4. Words which are >= `min_word_len_one_typo` but < `min_word_len_two_typos` can have one typo
+5. Words which are >= `min_word_len_two_typos` can have two typos
+6. A typo on the first letter of a word counts as two typos
+7. Phrases are not typo tolerant
+8. 2grams can have 1 typo if they are larger than `min_word_len_two_typos`
+9. 3grams are not typo tolerant
+10. The `typo` ranking rule assumes the role of the `words` ranking rule implicitly
+if `words` doesn't exist before it.
+11. The `typo` ranking rule places documents with the same number of typos in the same bucket
+12. Prefix tolerance costs nothing according to the typo ranking rule
+13. Split words cost 1 typo according to the typo ranking rule
+14. Synonyms cost nothing according to the typo ranking rule
+*/
+
+use std::collections::HashMap;
+
+use crate::{
+    index::tests::TempIndex, Criterion, 
+    Search, SearchResult, TermsMatchingStrategy,
+};
+
+fn create_index() -> TempIndex {
+    let index = TempIndex::new();
+
+    index
+        .update_settings(|s| {
+            s.set_primary_key("id".to_owned());
+            s.set_searchable_fields(vec!["text".to_owned()]);
+            s.set_criteria(vec![Criterion::Words]);
+        })
+        .unwrap();
+
+    index
+        .add_documents(documents!([
+            {
+                "id": 0,
+                "text": "the quick brown fox jumps over the lazy dog"
+            },
+            {
+                "id": 1,
+                "text": "the quick brown foxes jump over the lazy dog"
+            },
+            {
+                "id": 2,
+                "text": "the quick brown fax sends a letter to the dog"
+            },
+            {
+                "id": 3,
+                "text": "the quickest brownest fox jumps over the laziest dog"
+            },
+            {
+                "id": 4,
+                "text": "a fox doesn't quack, that crown goes to the duck."
+            },
+            {
+                "id": 5,
+                "text": "the quicker browner fox jumped over the lazier dog"
+            },
+            {
+                "id": 6,
+                "text": "the extravagant fox skyrocketed over the languorous dog" // thanks thesaurus
+            },
+            {
+                "id": 7,
+                "text": "the quick brown fox jumps over the lazy"
+            },
+            {
+                "id": 8,
+                "text": "the quick brown fox jumps over the"
+            },
+            {
+                "id": 9,
+                "text": "the quick brown fox jumps over"
+            },
+            {
+                "id": 10,
+                "text": "the quick brown fox jumps"
+            },
+            {
+                "id": 11,
+                "text": "the quick brown fox"
+            },
+            {
+                "id": 12,
+                "text": "the quick brown"
+            },
+            {
+                "id": 13,
+                "text": "the quick"
+            },
+            {
+                "id": 14,
+                "text": "netwolk interconections sunflawar"
+            },
+            {
+                "id": 15,
+                "text": "network interconnections sunflawer"
+            },
+            {
+                "id": 16,
+                "text": "network interconnection sunflower"
+            },
+            {
+                "id": 17,
+                "text": "network interconnection sun flower"
+            },
+            {
+                "id": 18,
+                "text": "network interconnection sunflowering"
+            },
+            {
+                "id": 19,
+                "text": "network interconnection sun flowering"
+            },
+            {
+                "id": 20,
+                "text": "network interconnection sunflowar"
+            },
+            {
+                "id": 21,
+                "text": "the fast brownish fox jumps over the lackadaisical dog"
+            },
+            {
+                "id": 22,
+                "text": "the quick brown fox jumps over the lackadaisical dog"
+            },
+        ]))
+        .unwrap();
+    index
+}
+
+#[test]
+fn test_no_typo() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_autorize_typos(false);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the quick brown fox jumps over the lazy dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+}
+
+#[test]
+fn test_default_typo() {
+    let index = create_index();
+    let txn = index.read_txn().unwrap();
+
+    let ot = index.min_word_len_one_typo(&txn).unwrap();
+    let tt = index.min_word_len_two_typos(&txn).unwrap();
+    insta::assert_debug_snapshot!(ot, @"5");
+    insta::assert_debug_snapshot!(tt, @"9");
+
+    // 0 typo
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the quick brown fox jumps over the lazy dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+
+    // 1 typo on one word, replaced letter
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the quack brown fox jumps over the lazy dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+
+    // 1 typo on one word, missing letter, extra letter
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the quicest brownest fox jummps over the laziest dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3]");
+
+    // 1 typo on one word, swapped letters
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the quikc borwn fox jupms over the lazy dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+
+    // 1 first letter typo on a word <5 bytes, replaced letter
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the nuick brown fox jumps over the lazy dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+
+    // 1 first letter typo on a word <5 bytes, missing letter
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the uick brown fox jumps over the lazy dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+
+    // 1 typo on all words >=5 bytes, replaced letters
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the quack brawn fox junps over the lazy dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0]");
+
+    // 2 typos on words < 9 bytes
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the quckest brawnert fox jumps over the aziest dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+
+    // 2 typos on words >= 9 bytes: missing letters, missing first letter, replaced letters
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the extravant fox kyrocketed over the lamguorout dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
+
+    // 2 typos on words >= 9 bytes: 2 extra letters in a single word, swapped letters + extra letter, replaced letters
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the extravaganttt fox sktyrocnketed over the lagnuorrous dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
+}
+
+#[test]
+fn test_phrase_no_typo_allowed() {
+    let index = create_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the \"quick brewn\" fox jumps over the lazy dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+}
+
+#[test]
+fn test_ngram_typos() {
+    let index = create_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the extra lagant fox skyrocketed over the languorous dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[6]");
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the ex tra lagant fox skyrocketed over the languorous dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+}
+#[test]
+fn test_typo_ranking_rule_not_preceded_by_words_ranking_rule() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Typo]);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    s.query("the quick brown fox jumps over the lazy dog");
+    let SearchResult { documents_ids: ids_1, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{ids_1:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
+
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
+        })
+        .unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    s.query("the quick brown fox jumps over the lazy dog");
+    let SearchResult { documents_ids: ids_2, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{ids_2:?}"), @"[0, 7, 8, 9, 10, 11, 1, 2, 12, 13, 4, 3, 5, 6, 21]");
+
+    assert_eq!(ids_1, ids_2);
+}
+
+#[test]
+fn test_typo_bucketing() {
+    let index = create_index();
+
+    let txn = index.read_txn().unwrap();
+
+    // First do the search with just the Words ranking rule
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("network interconnection sunflower");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 15, 16, 17, 18, 20]");
+
+    // Then with the typo ranking rule
+    drop(txn);
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Typo]);
+        })
+        .unwrap();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("network interconnection sunflower");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[16, 18, 17, 20, 15, 14]");
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("network interconnection sun flower");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[17, 19, 16, 18, 20, 15]");
+}
+
+#[test]
+fn test_typo_synonyms() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Typo]);
+
+            let mut synonyms = HashMap::new();
+            synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);
+            synonyms.insert("fast brownish".to_owned(), vec!["quick brown".to_owned()]);
+
+            s.set_synonyms(synonyms);
+        })
+        .unwrap();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the quick brown fox jumps over the lackadaisical dog");
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]");
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    s.query("the fast brownish fox jumps over the lackadaisical dog");
+
+    // TODO: is this correct? interaction of ngrams + synonyms means that the
+    // multi-word synonyms end up having a typo cost. This is probably not what we want.
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 0]");
+}
diff --git a/milli/src/search/new/tests/words_tms.rs b/milli/src/search/new/tests/words_tms.rs
new file mode 100644
index 000000000..8b5c0153f
--- /dev/null
+++ b/milli/src/search/new/tests/words_tms.rs
@@ -0,0 +1,266 @@
+/*!
+This module tests the following properties:
+
+1. The `last` term matching strategy starts removing terms from the query
+starting from the end if no more results match it.
+2. Phrases are never deleted by the `last` term matching strategy
+3. Duplicate words don't affect the ranking of a document according to the `words` ranking rule
+4. The proximity of the first and last word of a phrase to its adjacent terms is taken into
+account by the proximity ranking rule.
+5. Unclosed double quotes still make a phrase
+6. The `all` term matching strategy does not remove any term from the query
+7. The search is capable of returning no results if no documents match the query
+*/
+
+use crate::{index::tests::TempIndex, Criterion, Search, SearchResult, TermsMatchingStrategy};
+
+fn create_quick_brown_fox_trivial_index() -> TempIndex {
+    let index = TempIndex::new();
+
+    index
+        .update_settings(|s| {
+            s.set_primary_key("id".to_owned());
+            s.set_searchable_fields(vec!["text".to_owned()]);
+            s.set_criteria(vec![Criterion::Words]);
+        })
+        .unwrap();
+
+    index
+        .add_documents(documents!([
+            {
+                "id": 0,
+                "text": "",
+            },
+            {
+                "id": 1,
+                "text": "the",
+            },
+            {
+                "id": 2,
+                "text": "the quick",
+            },
+            {
+                "id": 3,
+                "text": "the quick brown",
+            },
+            {
+                "id": 4,
+                "text": "the quick brown fox",
+            },
+            {
+                "id": 5,
+                "text": "the quick brown fox jumps",
+            },
+            {
+                "id": 6,
+                "text": "the quick brown fox jumps over",
+            },
+            {
+                "id": 7,
+                "text": "the quick brown fox jumps over the",
+            },
+            {
+                "id": 8,
+                "text": "the quick brown fox jumps over the lazy",
+            },
+            {
+                "id": 9,
+                "text": "the quick brown fox jumps over the lazy dog",
+            },
+            {
+                "id": 10,
+                "text": "the brown quick fox jumps over the lazy dog",
+            },
+            {
+                "id": 11,
+                "text": "the quick brown fox talks to the lazy and slow dog",
+            },
+            {
+                "id": 12,
+                "text": "the quick brown fox talks to the lazy dog",
+            },
+            {
+                "id": 13,
+                "text": "the mighty and quick brown fox jumps over the lazy dog",
+            },
+            {
+                "id": 14,
+                "text": "the great quick brown fox jumps over the lazy dog",
+            },
+            {
+                "id": 15,
+                "text": "this quick brown and very scary fox jumps over the lazy dog",
+            },
+            {
+                "id": 16,
+                "text": "this quick brown and scary fox jumps over the lazy dog",
+            },
+            {
+                "id": 17,
+                "text": "the quick brown fox jumps over the really lazy dog",
+            },
+            {
+                "id": 18,
+                "text": "the brown quick fox jumps over the really lazy dog",
+            },
+            {
+                "id": 19,
+                "text": "the brown quick fox immediately jumps over the really lazy dog",
+            },
+            {
+                "id": 20,
+                "text": "the brown quick fox immediately jumps over the really lazy blue dog",
+            },
+            {
+                "id": 21,
+                "text": "the quick brown. quick brown fox. brown fox jumps. fox jumps over. over the lazy. the lazy dog.",
+            },
+            {
+                "id": 22,
+                "text": "the, quick, brown, fox, jumps, over, the, lazy, dog",
+            }
+        ]))
+        .unwrap();
+    index
+}
+
+#[test]
+fn test_words_tms_last_simple() {
+    let index = create_quick_brown_fox_trivial_index();
+
+    let txn = index.read_txn().unwrap();
+    let mut s = Search::new(&txn, &index);
+    s.query("the quick brown fox jumps over the lazy dog");
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // 6 and 7 have the same score because "the" appears twice
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 10, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 8, 6, 7, 5, 4, 11, 12, 3]");
+
+    let mut s = Search::new(&txn, &index);
+    s.query("extravagant the quick brown fox jumps over the lazy dog");
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+}
+
+#[test]
+fn test_words_tms_last_phrase() {
+    let index = create_quick_brown_fox_trivial_index();
+
+    let txn = index.read_txn().unwrap();
+    let mut s = Search::new(&txn, &index);
+    s.query("\"the quick brown fox\" jumps over the lazy dog");
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // "The quick brown fox" is a phrase, not deleted by this term matching strategy
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 6, 7, 5, 4, 11, 12]");
+
+    let mut s = Search::new(&txn, &index);
+    s.query("\"the quick brown fox\" jumps over the \"lazy\" dog");
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // "lazy" is a phrase, not deleted by this term matching strategy
+    // but words before it can be deleted
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 17, 21, 8, 11, 12]");
+
+    let mut s = Search::new(&txn, &index);
+    s.query("\"the quick brown fox jumps over the lazy dog\"");
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // The whole query is a phrase, no terms are removed
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]");
+
+    let mut s = Search::new(&txn, &index);
+    s.query("\"the quick brown fox jumps over the lazy dog");
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // The whole query is still a phrase, even without closing quotes, so no terms are removed
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9]");
+}
+
+#[test]
+fn test_words_proximity_tms_last_simple() {
+    let index = create_quick_brown_fox_trivial_index();
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+    let mut s = Search::new(&txn, &index);
+    s.query("the quick brown fox jumps over the lazy dog");
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // 7 is better than 6 because of the proximity between "the" and its surrounding terms
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
+
+    let mut s = Search::new(&txn, &index);
+    s.query("the brown quick fox jumps over the lazy dog");
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // 10 is better than 9 because of the proximity between "quick" and "brown"
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[10, 18, 19, 9, 20, 21, 14, 17, 13, 16, 15, 22, 8, 7, 6, 5, 4, 11, 12, 3]");
+}
+
+#[test]
+fn test_words_proximity_tms_last_phrase() {
+    let index = create_quick_brown_fox_trivial_index();
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+    let mut s = Search::new(&txn, &index);
+    s.query("the \"quick brown\" fox jumps over the lazy dog");
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // "quick brown" is a phrase. The proximity of its first and last words
+    // to their adjacent query words should be taken into account
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5, 4, 11, 12, 3]");
+
+    let mut s = Search::new(&txn, &index);
+    s.query("the \"quick brown\" \"fox jumps\" over the lazy dog");
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    // "quick brown" is a phrase. The proximity of its first and last words
+    // to their adjacent query words should be taken into account.
+    // The same applies to `fox jumps`.
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 16, 15, 8, 7, 6, 5]");
+}
+
+#[test]
+fn test_words_tms_all() {
+    let index = create_quick_brown_fox_trivial_index();
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Words, Criterion::Proximity]);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+    let mut s = Search::new(&txn, &index);
+    s.query("the quick brown fox jumps over the lazy dog");
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[9, 21, 14, 17, 13, 10, 18, 19, 20, 16, 15, 22]");
+
+    let mut s = Search::new(&txn, &index);
+    s.query("extravagant");
+    s.terms_matching_strategy(TermsMatchingStrategy::All);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[]");
+}