From 959e4607bb11c684463ddf1149895d9aecb08a7a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= <loic.lecrenier@me.com>
Date: Tue, 4 Apr 2023 18:02:46 +0200
Subject: [PATCH] Add more search tests

---
 milli/src/search/new/distinct.rs        |   2 +-
 milli/src/search/new/mod.rs             |   4 +-
 milli/src/search/new/tests/distinct.rs  | 590 ++++++++++++++++++++++++
 milli/src/search/new/tests/language.rs  |  22 +
 milli/src/search/new/tests/mod.rs       |  25 +
 milli/src/search/new/tests/proximity.rs |   0
 milli/src/search/new/tests/sort.rs      | 316 +++++++++++++
 7 files changed, 957 insertions(+), 2 deletions(-)
 create mode 100644 milli/src/search/new/tests/distinct.rs
 create mode 100644 milli/src/search/new/tests/language.rs
 create mode 100644 milli/src/search/new/tests/proximity.rs
 create mode 100644 milli/src/search/new/tests/sort.rs

diff --git a/milli/src/search/new/distinct.rs b/milli/src/search/new/distinct.rs
index ad4b46659..7b77adf49 100644
--- a/milli/src/search/new/distinct.rs
+++ b/milli/src/search/new/distinct.rs
@@ -41,7 +41,7 @@ pub fn apply_distinct_rule(
 }
 
 /// Apply the distinct rule defined by [`apply_distinct_rule`] for a single document id.
-fn distinct_single_docid(
+pub fn distinct_single_docid(
     index: &Index,
     txn: &RoTxn,
     field_id: u16,
diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs
index 4456d693d..e7e38fe89 100644
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -1,3 +1,4 @@
+mod bucket_sort;
 mod db_cache;
 mod distinct;
 mod graph_based_ranking_rule;
@@ -31,7 +32,8 @@ pub use logger::detailed::DetailedSearchLogger;
 pub use logger::{DefaultSearchLogger, SearchLogger};
 use query_graph::{QueryGraph, QueryNode};
 use query_term::{located_query_terms_from_string, Phrase, QueryTerm};
-use ranking_rules::{bucket_sort, PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait};
+use ranking_rules::{PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait};
+use bucket_sort::bucket_sort;
 use resolve_query_graph::PhraseDocIdsCache;
 use roaring::RoaringBitmap;
 use words::Words;
diff --git a/milli/src/search/new/tests/distinct.rs b/milli/src/search/new/tests/distinct.rs
new file mode 100644
index 000000000..4073cf585
--- /dev/null
+++ b/milli/src/search/new/tests/distinct.rs
@@ -0,0 +1,590 @@
+/*!
+This module tests the "distinct attribute" feature, and its
+interaction with other ranking rules.
+
+1. no duplicate distinct attributes are ever returned
+2. only the best document (according to the search rules) for each distinct value appears in the result
+3. if a document does not have a distinct attribute, then the distinct rule does not apply to it
+
+It doesn't test properly:
+- combination of distinct + exhaustive_nbr_hits (because we know it's incorrect)
+- distinct attributes with arrays (because we know it's incorrect as well)
+*/
+
+use std::collections::HashSet;
+
+use big_s::S;
+use heed::RoTxn;
+use maplit::hashset;
+
+use crate::{
+    index::tests::TempIndex, AscDesc, Criterion, Index, Member, Search, SearchResult,
+    TermsMatchingStrategy,
+};
+
+use super::collect_field_values;
+
+fn create_index() -> TempIndex {
+    let index = TempIndex::new();
+
+    index
+        .update_settings(|s| {
+            s.set_primary_key("id".to_owned());
+            s.set_searchable_fields(vec!["text".to_owned()]);
+            s.set_sortable_fields(hashset! { S("rank1"), S("letter") });
+            s.set_distinct_field("letter".to_owned());
+            s.set_criteria(vec![Criterion::Words]);
+        })
+        .unwrap();
+
+    index
+        .add_documents(documents!([
+            {
+                "id": 0,
+                "letter": "A",
+                "rank1": 0,
+                "text": "the quick brown fox jamps over the lazy dog",
+            },
+            {
+                "id": 1,
+                "letter": "A",
+                "rank1": 1,
+                "text": "the quick brown fox jumpes over the lazy dog",
+            },
+            {
+                "id": 2,
+                "letter": "B",
+                "rank1": 0,
+                "text": "the quick brown foxjumps over the lazy dog",
+            },
+            {
+                "id": 3,
+                "letter": "B",
+                "rank1": 1,
+                "text": "the quick brown fox jumps over the lazy dog",
+            },
+            {
+                "id": 4,
+                "letter": "B",
+                "rank1": 2,
+                "text": "the quick brown fox jumps over the lazy",
+            },
+            {
+                "id": 5,
+                "letter": "C",
+                "rank1": 0,
+                "text": "the quickbrownfox jumps over the lazy",
+            },
+            {
+                "id": 6,
+                "letter": "C",
+                "rank1": 1,
+                "text": "the quick brown fox jumpss over the lazy",
+            },
+            {
+                "id": 7,
+                "letter": "C",
+                "rank1": 2,
+                "text": "the quick brown fox jumps over the lazy",
+            },
+            {
+                "id": 8,
+                "letter": "D",
+                "rank1": 0,
+                "text": "the quick brown fox jumps over the lazy",
+            },
+            {
+                "id": 9,
+                "letter": "E",
+                "rank1": 0,
+                "text": "the quick brown fox jumps over the lazy",
+            },
+            {
+                "id": 10,
+                "letter": "E",
+                "rank1": 1,
+                "text": "the quackbrown foxjunps over",
+            },
+            {
+                "id": 11,
+                "letter": "E",
+                "rank1": 2,
+                "text": "the quicko browno fox junps over",
+            },
+            {
+                "id": 12,
+                "letter": "E",
+                "rank1": 3,
+                "text": "the quicko browno fox jumps over",
+            },
+            {
+                "id": 13,
+                "letter": "E",
+                "rank1": 4,
+                "text": "the quick brewn fox jumps over",
+            },
+            {
+                "id": 14,
+                "letter": "E",
+                "rank1": 5,
+                "text": "the quick brown fox jumps over",
+            },
+            {
+                "id": 15,
+                "letter": "F",
+                "rank1": 0,
+                "text": "the quick brownf fox jumps over",
+            },
+            {
+                "id": 16,
+                "letter": "F",
+                "rank1": 1,
+                "text": "the quic brown fox jamps over",
+            },
+            {
+                "id": 17,
+                "letter": "F",
+                "rank1": 2,
+                "text": "thequick browns fox jimps",
+            },
+            {
+                "id": 18,
+                "letter": "G",
+                "rank1": 0,
+                "text": "the qick brown fox jumps",
+            },
+            {
+                "id": 19,
+                "letter": "G",
+                "rank1": 1,
+                "text": "the quick brownfoxjumps",
+            },
+            {
+                "id": 20,
+                "letter": "H",
+                "rank1": 0,
+                "text": "the quick brow fox jumps",
+            },
+            {
+                "id": 21,
+                "letter": "I",
+                "rank1": 0,
+                "text": "the quick brown fox jpmps",
+            },
+            {
+                "id": 22,
+                "letter": "I",
+                "rank1": 1,
+                "text": "the quick brown fox jumps",
+            },
+            {
+                "id": 23,
+                "letter": "I",
+                "rank1": 2,
+                "text": "the quick",
+            },
+            {
+                "id": 24,
+                "rank1": 0,
+                "text": "the quick",
+            },
+            {
+                "id": 25,
+                "rank1": 1,
+                "text": "the quick brown",
+            },
+            {
+                "id": 26,
+                "rank1": 2,
+                "text": "the quick brown fox",
+            },
+            {
+                "id": 26,
+                "rank1": 3,
+                "text": "the quick brown fox jumps over the lazy dog",
+            },
+        ]))
+        .unwrap();
+    index
+}
+
+fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec<String> {
+    let vs = collect_field_values(index, txn, index.distinct_field(txn).unwrap().unwrap(), docids);
+
+    let mut unique = HashSet::new();
+    for v in vs.iter() {
+        if v == "__does_not_exist__" {
+            continue;
+        }
+        assert!(unique.insert(v.clone()));
+    }
+
+    vs
+}
+
+#[test]
+fn test_distinct_placeholder_no_ranking_rules() {
+    let index = create_index();
+
+    let txn = index.read_txn().unwrap();
+
+    let s = Search::new(&txn, &index);
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
+    let distinct_values = verify_distinct(&index, &txn, &documents_ids);
+    insta::assert_debug_snapshot!(distinct_values, @r###"
+    [
+        "\"A\"",
+        "\"B\"",
+        "\"C\"",
+        "\"D\"",
+        "\"E\"",
+        "\"F\"",
+        "\"G\"",
+        "\"H\"",
+        "\"I\"",
+        "__does_not_exist__",
+        "__does_not_exist__",
+        "__does_not_exist__",
+    ]
+    "###);
+}
+
+#[test]
+fn test_distinct_placeholder_sort() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Sort]);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
+
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]");
+    let distinct_values = verify_distinct(&index, &txn, &documents_ids);
+    insta::assert_debug_snapshot!(distinct_values, @r###"
+    [
+        "\"E\"",
+        "__does_not_exist__",
+        "\"B\"",
+        "\"C\"",
+        "\"F\"",
+        "\"I\"",
+        "\"A\"",
+        "\"G\"",
+        "__does_not_exist__",
+        "\"D\"",
+        "\"H\"",
+        "__does_not_exist__",
+    ]
+    "###);
+    let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
+    insta::assert_debug_snapshot!(rank_values, @r###"
+    [
+        "5",
+        "3",
+        "2",
+        "2",
+        "2",
+        "2",
+        "1",
+        "1",
+        "1",
+        "0",
+        "0",
+        "0",
+    ]
+    "###);
+
+    let mut s = Search::new(&txn, &index);
+    s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
+
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]");
+    let distinct_values = verify_distinct(&index, &txn, &documents_ids);
+    insta::assert_debug_snapshot!(distinct_values, @r###"
+    [
+        "\"I\"",
+        "\"H\"",
+        "\"G\"",
+        "\"F\"",
+        "\"E\"",
+        "\"D\"",
+        "\"C\"",
+        "\"B\"",
+        "\"A\"",
+        "__does_not_exist__",
+        "__does_not_exist__",
+        "__does_not_exist__",
+    ]
+    "###);
+    let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
+    insta::assert_debug_snapshot!(rank_values, @r###"
+    [
+        "0",
+        "0",
+        "0",
+        "0",
+        "0",
+        "0",
+        "0",
+        "0",
+        "0",
+        "0",
+        "1",
+        "3",
+    ]
+    "###);
+
+    let mut s = Search::new(&txn, &index);
+    s.sort_criteria(vec![
+        AscDesc::Desc(Member::Field(S("letter"))),
+        AscDesc::Desc(Member::Field(S("rank1"))),
+    ]);
+
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]");
+    let distinct_values = verify_distinct(&index, &txn, &documents_ids);
+    insta::assert_debug_snapshot!(distinct_values, @r###"
+    [
+        "\"I\"",
+        "\"H\"",
+        "\"G\"",
+        "\"F\"",
+        "\"E\"",
+        "\"D\"",
+        "\"C\"",
+        "\"B\"",
+        "\"A\"",
+        "__does_not_exist__",
+        "__does_not_exist__",
+        "__does_not_exist__",
+    ]
+    "###);
+    let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
+    insta::assert_debug_snapshot!(rank_values, @r###"
+    [
+        "2",
+        "0",
+        "1",
+        "2",
+        "5",
+        "0",
+        "2",
+        "2",
+        "1",
+        "3",
+        "1",
+        "0",
+    ]
+    "###);
+}
+
+#[test]
+fn test_distinct_words() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Words]);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    s.query("the quick brown fox jumps over the lazy dog");
+
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]");
+    let distinct_values = verify_distinct(&index, &txn, &documents_ids);
+    insta::assert_debug_snapshot!(distinct_values, @r###"
+    [
+        "\"A\"",
+        "\"B\"",
+        "__does_not_exist__",
+        "\"C\"",
+        "\"D\"",
+        "\"E\"",
+        "\"F\"",
+        "\"G\"",
+        "\"H\"",
+        "\"I\"",
+        "__does_not_exist__",
+        "__does_not_exist__",
+    ]
+    "###);
+    let text_values = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(text_values, @r###"
+    [
+        "\"the quick brown fox jamps over the lazy dog\"",
+        "\"the quick brown foxjumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quickbrownfox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brownf fox jumps over\"",
+        "\"the qick brown fox jumps\"",
+        "\"the quick brow fox jumps\"",
+        "\"the quick brown fox jpmps\"",
+        "\"the quick brown\"",
+        "\"the quick\"",
+    ]
+    "###);
+}
+
+#[test]
+fn test_distinct_sort_words() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Sort, Criterion::Words, Criterion::Desc(S("rank1"))]);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    s.query("the quick brown fox jumps over the lazy dog");
+    s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
+
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]");
+    let distinct_values = verify_distinct(&index, &txn, &documents_ids);
+    insta::assert_debug_snapshot!(distinct_values, @r###"
+    [
+        "\"I\"",
+        "\"H\"",
+        "\"G\"",
+        "\"F\"",
+        "\"E\"",
+        "\"D\"",
+        "\"C\"",
+        "\"B\"",
+        "\"A\"",
+        "__does_not_exist__",
+        "__does_not_exist__",
+        "__does_not_exist__",
+    ]
+    "###);
+
+    let rank_values = collect_field_values(&index, &txn, "rank1", &documents_ids);
+    insta::assert_debug_snapshot!(rank_values, @r###"
+    [
+        "1",
+        "0",
+        "1",
+        "1",
+        "0",
+        "0",
+        "2",
+        "1",
+        "1",
+        "3",
+        "1",
+        "0",
+    ]
+    "###);
+
+    let text_values = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(text_values, @r###"
+    [
+        "\"the quick brown fox jumps\"",
+        "\"the quick brow fox jumps\"",
+        "\"the quick brownfoxjumps\"",
+        "\"the quic brown fox jamps over\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumpes over the lazy dog\"",
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown\"",
+        "\"the quick\"",
+    ]
+    "###);
+}
+
+#[test]
+fn test_distinct_all_candidates() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Sort]);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank1")))]);
+    s.exhaustive_number_hits(true);
+
+    let SearchResult { documents_ids, candidates, .. } = s.execute().unwrap();
+    let candidates = candidates.iter().collect::<Vec<_>>();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]");
+    // TODO: this is incorrect!
+    insta::assert_snapshot!(format!("{candidates:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]");
+}
+
+#[test]
+fn test_distinct_typo() {
+    let index = create_index();
+    index
+        .update_settings(|s| {
+            s.set_criteria(vec![Criterion::Words, Criterion::Typo]);
+        })
+        .unwrap();
+
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.query("the quick brown fox jumps over the lazy dog");
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]");
+
+    let distinct_values = verify_distinct(&index, &txn, &documents_ids);
+    insta::assert_debug_snapshot!(distinct_values, @r###"
+    [
+        "\"B\"",
+        "__does_not_exist__",
+        "\"A\"",
+        "\"C\"",
+        "\"D\"",
+        "\"E\"",
+        "\"F\"",
+        "\"I\"",
+        "\"G\"",
+        "\"H\"",
+        "__does_not_exist__",
+        "__does_not_exist__",
+    ]
+    "###);
+
+    let text_values = collect_field_values(&index, &txn, "text", &documents_ids);
+    insta::assert_debug_snapshot!(text_values, @r###"
+    [
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jumps over the lazy dog\"",
+        "\"the quick brown fox jamps over the lazy dog\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brown fox jumps over the lazy\"",
+        "\"the quick brownf fox jumps over\"",
+        "\"the quick brown fox jumps\"",
+        "\"the qick brown fox jumps\"",
+        "\"the quick brow fox jumps\"",
+        "\"the quick brown\"",
+        "\"the quick\"",
+    ]
+    "###);
+}
diff --git a/milli/src/search/new/tests/language.rs b/milli/src/search/new/tests/language.rs
new file mode 100644
index 000000000..6adad748c
--- /dev/null
+++ b/milli/src/search/new/tests/language.rs
@@ -0,0 +1,22 @@
+use crate::{index::tests::TempIndex, Search, SearchResult};
+
+#[test]
+fn test_kanji_language_detection() {
+    let index = TempIndex::new();
+
+    index
+        .add_documents(documents!([
+            { "id": 0, "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
+            { "id": 1, "title": "東京のお寿司。" },
+            { "id": 2, "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" }
+        ]))
+        .unwrap();
+
+    let txn = index.write_txn().unwrap();
+    let mut search = Search::new(&txn, &index);
+
+    search.query("東京");
+    let SearchResult { documents_ids, .. } = search.execute().unwrap();
+
+    assert_eq!(documents_ids, vec![1]);
+}
diff --git a/milli/src/search/new/tests/mod.rs b/milli/src/search/new/tests/mod.rs
index eec4c62ec..0fd5013db 100644
--- a/milli/src/search/new/tests/mod.rs
+++ b/milli/src/search/new/tests/mod.rs
@@ -1,3 +1,28 @@
+pub mod distinct;
+#[cfg(feature = "default")]
+pub mod language;
 pub mod ngram_split_words;
+pub mod proximity;
+pub mod sort;
 pub mod typo;
 pub mod words_tms;
+
+fn collect_field_values(
+    index: &crate::Index,
+    txn: &heed::RoTxn,
+    fid: &str,
+    docids: &[u32],
+) -> Vec<String> {
+    let mut values = vec![];
+    let fid = index.fields_ids_map(txn).unwrap().id(fid).unwrap();
+    for doc in index.documents(txn, docids.iter().copied()).unwrap() {
+        if let Some(v) = doc.1.get(fid) {
+            let v: serde_json::Value = serde_json::from_slice(v).unwrap();
+            let v = v.to_string();
+            values.push(v);
+        } else {
+            values.push("__does_not_exist__".to_owned());
+        }
+    }
+    values
+}
diff --git a/milli/src/search/new/tests/proximity.rs b/milli/src/search/new/tests/proximity.rs
new file mode 100644
index 000000000..e69de29bb
diff --git a/milli/src/search/new/tests/sort.rs b/milli/src/search/new/tests/sort.rs
new file mode 100644
index 000000000..d3a952a24
--- /dev/null
+++ b/milli/src/search/new/tests/sort.rs
@@ -0,0 +1,316 @@
+/*!
+This module tests the `sort` ranking rule:
+
+1. an error is returned if the sort ranking rule exists but no fields-to-sort were given at search time
+2. an error is returned if the fields-to-sort are not sortable
+3. it is possible to add multiple fields-to-sort at search time
+4. custom sort ranking rules can be added to the settings, they interact with the generic `sort` ranking rule as expected
+5. numbers appear before strings
+6. documents with either: (1) no value, (2) null, or (3) an object for the field-to-sort appear at the end of the bucket
+7. boolean values are translated to strings
+8. if a field contains an array, it is sorted by the best value in the array according to the sort rule
+*/
+
+use big_s::S;
+use maplit::hashset;
+
+use crate::{
+    index::tests::TempIndex, search::new::tests::collect_field_values, AscDesc, Criterion, Member,
+    Search, SearchResult, TermsMatchingStrategy,
+};
+
+fn create_index() -> TempIndex {
+    let index = TempIndex::new();
+
+    index
+        .update_settings(|s| {
+            s.set_primary_key("id".to_owned());
+            s.set_searchable_fields(vec!["text".to_owned()]);
+            s.set_sortable_fields(hashset! { S("rank"), S("vague"), S("letter") });
+            s.set_criteria(vec![Criterion::Sort]);
+        })
+        .unwrap();
+
+    index
+        .add_documents(documents!([
+            {
+                "id": 0,
+                "letter": "A",
+                "rank": 0,
+                "vague": 0,
+            },
+            {
+                "id": 1,
+                "letter": "A",
+                "rank": 1,
+                "vague": "0",
+            },
+            {
+                "id": 2,
+                "letter": "B",
+                "rank": 0,
+                "vague": 1,
+            },
+            {
+                "id": 3,
+                "letter": "B",
+                "rank": 1,
+                "vague": "1",
+            },
+            {
+                "id": 4,
+                "letter": "B",
+                "rank": 2,
+                "vague": [1, 2],
+            },
+            {
+                "id": 5,
+                "letter": "C",
+                "rank": 0,
+                "vague": [1, "2"],
+            },
+            {
+                "id": 6,
+                "letter": "C",
+                "rank": 1,
+            },
+            {
+                "id": 7,
+                "letter": "C",
+                "rank": 2,
+                "vague": null,
+            },
+            {
+                "id": 8,
+                "letter": "D",
+                "rank": 0,
+                "vague": [null, null, ""]
+            },
+            {
+                "id": 9,
+                "letter": "E",
+                "rank": 0,
+                "vague": ""
+            },
+            {
+                "id": 10,
+                "letter": "E",
+                "rank": 1,
+                "vague": {
+                    "sub": 0,
+                }
+            },
+            {
+                "id": 11,
+                "letter": "E",
+                "rank": 2,
+                "vague": true,
+            },
+            {
+                "id": 12,
+                "letter": "E",
+                "rank": 3,
+                "vague": false,
+            },
+            {
+                "id": 13,
+                "letter": "E",
+                "rank": 4,
+                "vague": 1.5673,
+            },
+            {
+                "id": 14,
+                "letter": "E",
+                "rank": 5,
+            },
+            {
+                "id": 15,
+                "letter": "F",
+                "rank": 0,
+            },
+            {
+                "id": 16,
+                "letter": "F",
+                "rank": 1,
+            },
+            {
+                "id": 17,
+                "letter": "F",
+                "rank": 2,
+            },
+            {
+                "id": 18,
+                "letter": "G",
+                "rank": 0,
+            },
+            {
+                "id": 19,
+                "letter": "G",
+                "rank": 1,
+            },
+            {
+                "id": 20,
+                "letter": "H",
+                "rank": 0,
+                "vague": true,
+            },
+            {
+                "id": 21,
+                "letter": "I",
+                "rank": 0,
+                "vague": false,
+            },
+            {
+                "id": 22,
+                "letter": "I",
+                "rank": 1,
+                "vague": [1.1367, "help", null]
+            },
+            {
+                "id": 23,
+                "letter": "I",
+                "rank": 2,
+                "vague": [1.2367, "hello"]
+            },
+        ]))
+        .unwrap();
+    index
+}
+
+#[test]
+fn test_sort() {
+    let index = create_index();
+    let txn = index.read_txn().unwrap();
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("letter")))]);
+
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 22, 23, 20, 18, 19, 15, 16, 17, 9, 10, 11, 12, 13, 14, 8, 5, 6, 7, 2]");
+
+    let letter_values = collect_field_values(&index, &txn, "letter", &documents_ids);
+    insta::assert_debug_snapshot!(letter_values, @r###"
+    [
+        "\"I\"",
+        "\"I\"",
+        "\"I\"",
+        "\"H\"",
+        "\"G\"",
+        "\"G\"",
+        "\"F\"",
+        "\"F\"",
+        "\"F\"",
+        "\"E\"",
+        "\"E\"",
+        "\"E\"",
+        "\"E\"",
+        "\"E\"",
+        "\"E\"",
+        "\"D\"",
+        "\"C\"",
+        "\"C\"",
+        "\"C\"",
+        "\"B\"",
+    ]
+    "###);
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("rank")))]);
+
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 13, 12, 4, 7, 11, 17, 23, 1, 3, 6, 10, 16, 19, 22, 0, 2, 5, 8, 9]");
+
+    let rank_values = collect_field_values(&index, &txn, "rank", &documents_ids);
+    insta::assert_debug_snapshot!(rank_values, @r###"
+    [
+        "5",
+        "4",
+        "3",
+        "2",
+        "2",
+        "2",
+        "2",
+        "2",
+        "1",
+        "1",
+        "1",
+        "1",
+        "1",
+        "1",
+        "1",
+        "0",
+        "0",
+        "0",
+        "0",
+        "0",
+    ]
+    "###);
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    s.sort_criteria(vec![AscDesc::Asc(Member::Field(S("vague")))]);
+
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 4, 5, 22, 23, 13, 1, 3, 12, 21, 11, 20, 6, 7, 8, 9, 10, 14, 15]");
+
+    let vague_values = collect_field_values(&index, &txn, "vague", &documents_ids);
+    insta::assert_debug_snapshot!(vague_values, @r###"
+    [
+        "0",
+        "1",
+        "[1,2]",
+        "[1,\"2\"]",
+        "[1.1367,\"help\",null]",
+        "[1.2367,\"hello\"]",
+        "1.5673",
+        "\"0\"",
+        "\"1\"",
+        "false",
+        "false",
+        "true",
+        "true",
+        "__does_not_exist___",
+        "null",
+        "[null,null,\"\"]",
+        "\"\"",
+        "{\"sub\":0}",
+        "__does_not_exist___",
+        "__does_not_exist___",
+    ]
+    "###);
+
+    let mut s = Search::new(&txn, &index);
+    s.terms_matching_strategy(TermsMatchingStrategy::Last);
+    s.sort_criteria(vec![AscDesc::Desc(Member::Field(S("vague")))]);
+
+    let SearchResult { documents_ids, .. } = s.execute().unwrap();
+    insta::assert_snapshot!(format!("{documents_ids:?}"), @"[4, 13, 23, 22, 2, 5, 0, 11, 20, 12, 21, 3, 1, 6, 7, 8, 9, 10, 14, 15]");
+
+    let vague_values = collect_field_values(&index, &txn, "vague", &documents_ids);
+    insta::assert_debug_snapshot!(vague_values, @r###"
+    [
+        "[1,2]",
+        "1.5673",
+        "[1.2367,\"hello\"]",
+        "[1.1367,\"help\",null]",
+        "1",
+        "[1,\"2\"]",
+        "0",
+        "true",
+        "true",
+        "false",
+        "false",
+        "\"1\"",
+        "\"0\"",
+        "__does_not_exist___",
+        "null",
+        "[null,null,\"\"]",
+        "\"\"",
+        "{\"sub\":0}",
+        "__does_not_exist___",
+        "__does_not_exist___",
+    ]
+    "###);
+}