Merge #601

601: Introduce snapshot tests r=Kerollmops a=loiclec # Pull Request ## What does this PR do? Introduce snapshot tests into milli, by using the `insta` crate. This implements the idea described by #597 See: [insta.rs](https://insta.rs) ## Design There is now a new file, `snapshot_tests.rs`, which is compiled only under `#[cfg(test)]`. It exposes the `db_snap!` macro, which is used to snapshot the content of a database. When running `cargo test`, `insta` will check that the value of the current snapshot is the same as the previous one (on the file system). If they are the same, the test passes. If they are different, the test fails and you are asked to review the new snapshot to approve or reject it. We don't want to save very large snapshots to the file system, because it will pollute the git repository and increase its size too much. Instead, we only save their `md5` hashes under the name `<snapshot_name>.hash.snap`. There is a new environment variable called `MILLI_TEST_FULL_SNAPS` which can be set to `true` in order to *also* save the full content of the snapshot under the name `<snapshot_name>.full.snap`. However, snapshots with the extension `.full.snap` are never saved to the git repository. ## Example ```rust // In e.g. facets.rs #[test] fn my_test() { // create an index let index = TempIndex::new(): index.add_documents(...); index.update_settings(|settings| ...); // then snapshot the content of one of its databases // the snapshot will be saved at the current folder under facets.rs/my_test/facet_id_string_docids.snap db_snap!(index, facet_id_string_docids); index.add_documents(...); // we can also name the snapshot to ensure there is no conflict // this snapshot will be saved at facets.rs/my_test/updated/facet_id_string_docids.snap db_snap!(index, facet_id_string, docids, "updated"); // and we can also use "inline" snapshots, which insert their content in the given string literal db_snap!(index, field_distributions, `@"");` // once the snapshot is approved, it will automatically get transformed to, e.g.: // db_snap!(index, field_distributions, `@"` // my_facet 21 // other_field 3 // "); // now let's add **many** documents index.add_documents(...); // because the snapshot is too big, its hash is saved instead // if the MILLI_TEST_FULL_SNAPS env variable is set to true, then the full snapshot will also be saved // at facets.rs/my_test/large/facet_id_string_docids.full.snap db_snap!(index, facet_id_string_docids, "large", `@"5348bbc46b5384455b6a900666d2a502");` } ``` Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
2025-07-04 04:17:10 +02:00 · 2022-08-16 11:57:09 +00:00 · 2022-08-16 11:57:09 +00:00 · 293a246af8
commit 293a246af8
parent 950d8e4c44 dea00311b6
25 changed files with 1234 additions and 661 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,3 +6,10 @@
 *.csv
 *.mmdb
 *.svg
+
+# Snapshots
+## ... large
+*.full.snap
+
+#  ... unreviewed
+*.snap.new
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -51,7 +51,9 @@ csv = "1.1.6"

 [dev-dependencies]
 big_s = "1.0.2"
+insta = "1.18.1"
 maplit = "1.0.2"
+md5 = "0.7.0"
 rand = "0.8.5"

 [features]
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@ -99,7 +99,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
    )]
    InvalidDocumentId { document_id: Value },
    #[error("Invalid facet distribution, the fields `{}` are not set as filterable.",
-        .invalid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<_>>().join(", ")
+        .invalid_facets_name.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
     )]
    InvalidFacetsDistribution { invalid_facets_name: BTreeSet<String> },
    #[error(transparent)]
@ -111,7 +111,7 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
        match .valid_fields.is_empty() {
            true => "This index does not have configured sortable attributes.".to_string(),
            false => format!("Available sortable attributes are: `{}`.",
-                    valid_fields.iter().map(AsRef::as_ref).collect::<Vec<_>>().join(", ")
+                    valid_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(", ")
                ),
        }
    )]
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -1183,13 +1183,12 @@ pub(crate) mod tests {

    use big_s::S;
    use heed::{EnvOpenOptions, RwTxn};
-    use maplit::btreemap;
    use tempfile::TempDir;

    use crate::documents::DocumentsBatchReader;
    use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
    use crate::update::{self, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
-    use crate::Index;
+    use crate::{db_snap, Index};

    pub(crate) struct TempIndex {
        pub inner: Index,
@ -1288,17 +1287,30 @@ pub(crate) mod tests {
            ]))
            .unwrap();

-        let rtxn = index.read_txn().unwrap();
-        let field_distribution = index.field_distribution(&rtxn).unwrap();
-        assert_eq!(
-            field_distribution,
-            btreemap! {
-                "id".to_string() => 2,
-                "name".to_string() => 2,
-                "age".to_string() => 1,
-            }
+        db_snap!(index, field_distribution, 1);
+
+        db_snap!(index, word_docids,
+            @r###"
+        1                [0, ]
+        2                [1, ]
+        20               [1, ]
+        bob              [1, ]
+        kevin            [0, ]
+        "###
        );

+        db_snap!(index, field_distribution);
+
+        db_snap!(index, field_distribution,
+            @"
+            age              1     
+            id               2     
+            name             2     
+            "
+        );
+
+        // snapshot_index!(&index, "1", include: "^field_distribution$");
+
        // we add all the documents a second time. we are supposed to get the same
        // field_distribution in the end
        index
@ -1309,16 +1321,12 @@ pub(crate) mod tests {
            ]))
            .unwrap();

-        let rtxn = index.read_txn().unwrap();
-
-        let field_distribution = index.field_distribution(&rtxn).unwrap();
-        assert_eq!(
-            field_distribution,
-            btreemap! {
-                "id".to_string() => 2,
-                "name".to_string() => 2,
-                "age".to_string() => 1,
-            }
+        db_snap!(index, field_distribution,
+            @r###"
+            age              1     
+            id               2     
+            name             2     
+            "###
        );

        // then we update a document by removing one field and another by adding one field
@ -1329,16 +1337,12 @@ pub(crate) mod tests {
            ]))
            .unwrap();

-        let rtxn = index.read_txn().unwrap();
-
-        let field_distribution = index.field_distribution(&rtxn).unwrap();
-        assert_eq!(
-            field_distribution,
-            btreemap! {
-                "id".to_string() => 2,
-                "name".to_string() => 2,
-                "has_dog".to_string() => 1,
-            }
+        db_snap!(index, field_distribution,
+            @r###"
+            has_dog          1     
+            id               2     
+            name             2     
+            "###
        );
    }

--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@ -13,6 +13,10 @@ pub mod proximity;
 mod search;
 pub mod update;

+#[cfg(test)]
+#[macro_use]
+pub mod snapshot_tests;
+
 use std::collections::{BTreeMap, HashMap};
 use std::convert::{TryFrom, TryInto};
 use std::hash::BuildHasherDefault;
--- a/milli/src/search/criteria/attribute.rs
+++ b/milli/src/search/criteria/attribute.rs
@ -632,25 +632,59 @@ mod tests {
                ]),
            ],
        );
-
-        let expected = vec![
-            vec![vec![Query { prefix: false, kind: QueryKind::exact(S("manythefish")) }]],
-            vec![
-                vec![Query { prefix: false, kind: QueryKind::exact(S("manythe")) }],
-                vec![Query { prefix: false, kind: QueryKind::exact(S("fish")) }],
-            ],
-            vec![
-                vec![Query { prefix: false, kind: QueryKind::exact(S("many")) }],
-                vec![Query { prefix: false, kind: QueryKind::exact(S("thefish")) }],
-            ],
-            vec![
-                vec![Query { prefix: false, kind: QueryKind::exact(S("many")) }],
-                vec![Query { prefix: false, kind: QueryKind::exact(S("the")) }],
-                vec![Query { prefix: false, kind: QueryKind::exact(S("fish")) }],
-            ],
-        ];
-
        let result = flatten_query_tree(&query_tree);
-        assert_eq!(expected, result);
+
+        insta::assert_debug_snapshot!(result, @r###"
+        [
+            [
+                [
+                    Exact {
+                        word: "manythefish",
+                    },
+                ],
+            ],
+            [
+                [
+                    Exact {
+                        word: "manythe",
+                    },
+                ],
+                [
+                    Exact {
+                        word: "fish",
+                    },
+                ],
+            ],
+            [
+                [
+                    Exact {
+                        word: "many",
+                    },
+                ],
+                [
+                    Exact {
+                        word: "thefish",
+                    },
+                ],
+            ],
+            [
+                [
+                    Exact {
+                        word: "many",
+                    },
+                ],
+                [
+                    Exact {
+                        word: "the",
+                    },
+                ],
+                [
+                    Exact {
+                        word: "fish",
+                    },
+                ],
+            ],
+        ]
+        "###);
    }
 }
--- a/milli/src/search/criteria/typo.rs
+++ b/milli/src/search/criteria/typo.rs
@ -349,22 +349,33 @@ mod test {
    use super::super::test::TestContext;
    use super::*;

+    fn display_criteria(mut criteria: Typo, mut parameters: CriterionParameters) -> String {
+        let mut result = String::new();
+        while let Some(criterion) = criteria.next(&mut parameters).unwrap() {
+            result.push_str(&format!("{criterion:?}\n\n"));
+        }
+        result
+    }
+
    #[test]
    fn initial_placeholder_no_facets() {
        let context = TestContext::default();
        let query_tree = None;
        let facet_candidates = None;

-        let mut criterion_parameters = CriterionParameters {
+        let criterion_parameters = CriterionParameters {
            wdcache: &mut WordDerivationsCache::new(),
            excluded_candidates: &RoaringBitmap::new(),
        };

        let parent = Initial::new(query_tree, facet_candidates);
-        let mut criteria = Typo::new(&context, Box::new(parent));
+        let criteria = Typo::new(&context, Box::new(parent));

-        assert!(criteria.next(&mut criterion_parameters).unwrap().unwrap().candidates.is_none());
-        assert!(criteria.next(&mut criterion_parameters).unwrap().is_none());
+        let result = display_criteria(criteria, criterion_parameters);
+        insta::assert_snapshot!(result, @r###"
+        CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, bucket_candidates: None }
+
+        "###);
    }

    #[test]
@ -390,78 +401,32 @@ mod test {

        let facet_candidates = None;

-        let mut criterion_parameters = CriterionParameters {
+        let criterion_parameters = CriterionParameters {
            wdcache: &mut WordDerivationsCache::new(),
            excluded_candidates: &RoaringBitmap::new(),
        };
        let parent = Initial::new(Some(query_tree), facet_candidates);
-        let mut criteria = Typo::new(&context, Box::new(parent));
+        let criteria = Typo::new(&context, Box::new(parent));

-        let candidates_1 = context.word_docids("split").unwrap().unwrap()
-            & context.word_docids("this").unwrap().unwrap()
-            & context.word_docids("world").unwrap().unwrap();
-        let expected_1 = CriterionResult {
-            query_tree: Some(Operation::Or(
-                false,
-                vec![Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("split".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("this".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("world".to_string()),
-                    }),
-                ])],
-            )),
-            candidates: Some(candidates_1.clone()),
-            bucket_candidates: Some(candidates_1),
-            filtered_candidates: None,
-        };
+        let result = display_criteria(criteria, criterion_parameters);
+        insta::assert_snapshot!(result, @r###"
+        CriterionResult { query_tree: Some(OR
+          AND
+            Exact { word: "split" }
+            Exact { word: "this" }
+            Exact { word: "world" }
+        ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }

-        assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
+        CriterionResult { query_tree: Some(OR
+          AND
+            Exact { word: "split" }
+            Exact { word: "this" }
+            OR
+              Exact { word: "word" }
+              Exact { word: "world" }
+        ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }

-        let candidates_2 = (context.word_docids("split").unwrap().unwrap()
-            & context.word_docids("this").unwrap().unwrap()
-            & context.word_docids("word").unwrap().unwrap())
-            - context.word_docids("world").unwrap().unwrap();
-        let expected_2 = CriterionResult {
-            query_tree: Some(Operation::Or(
-                false,
-                vec![Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("split".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("this".to_string()),
-                    }),
-                    Operation::Or(
-                        false,
-                        vec![
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact_with_typo(1, "word".to_string()),
-                            }),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact("world".to_string()),
-                            }),
-                        ],
-                    ),
-                ])],
-            )),
-            candidates: Some(candidates_2.clone()),
-            bucket_candidates: Some(candidates_2),
-            filtered_candidates: None,
-        };
-
-        assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));
+        "###);
    }

    #[test]
@ -470,25 +435,18 @@ mod test {
        let query_tree = None;
        let facet_candidates = context.word_docids("earth").unwrap().unwrap();

-        let mut criterion_parameters = CriterionParameters {
+        let criterion_parameters = CriterionParameters {
            wdcache: &mut WordDerivationsCache::new(),
            excluded_candidates: &RoaringBitmap::new(),
        };
        let parent = Initial::new(query_tree, Some(facet_candidates.clone()));
-        let mut criteria = Typo::new(&context, Box::new(parent));
+        let criteria = Typo::new(&context, Box::new(parent));

-        let expected = CriterionResult {
-            query_tree: None,
-            candidates: None,
-            bucket_candidates: None,
-            filtered_candidates: Some(facet_candidates.clone()),
-        };
+        let result = display_criteria(criteria, criterion_parameters);
+        insta::assert_snapshot!(result, @r###"
+        CriterionResult { query_tree: None, candidates: None, filtered_candidates: Some(RoaringBitmap<8000 values between 986424 and 4294786076>), bucket_candidates: None }

-        // first iteration, returns the facet candidates
-        assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected));
-
-        // second iteration, returns None because there is no more things to do
-        assert!(criteria.next(&mut criterion_parameters).unwrap().is_none());
+        "###);
    }

    #[test]
@ -514,77 +472,31 @@ mod test {

        let facet_candidates = context.word_docids("earth").unwrap().unwrap();

-        let mut criterion_parameters = CriterionParameters {
+        let criterion_parameters = CriterionParameters {
            wdcache: &mut WordDerivationsCache::new(),
            excluded_candidates: &RoaringBitmap::new(),
        };
        let parent = Initial::new(Some(query_tree), Some(facet_candidates.clone()));
-        let mut criteria = Typo::new(&context, Box::new(parent));
+        let criteria = Typo::new(&context, Box::new(parent));

-        let candidates_1 = context.word_docids("split").unwrap().unwrap()
-            & context.word_docids("this").unwrap().unwrap()
-            & context.word_docids("world").unwrap().unwrap();
-        let expected_1 = CriterionResult {
-            query_tree: Some(Operation::Or(
-                false,
-                vec![Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("split".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("this".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("world".to_string()),
-                    }),
-                ])],
-            )),
-            candidates: Some(&candidates_1 & &facet_candidates),
-            bucket_candidates: Some(&candidates_1 & &facet_candidates),
-            filtered_candidates: None,
-        };
+        let result = display_criteria(criteria, criterion_parameters);
+        insta::assert_snapshot!(result, @r###"
+        CriterionResult { query_tree: Some(OR
+          AND
+            Exact { word: "split" }
+            Exact { word: "this" }
+            Exact { word: "world" }
+        ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }

-        assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
+        CriterionResult { query_tree: Some(OR
+          AND
+            Exact { word: "split" }
+            Exact { word: "this" }
+            OR
+              Exact { word: "word" }
+              Exact { word: "world" }
+        ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }

-        let candidates_2 = (context.word_docids("split").unwrap().unwrap()
-            & context.word_docids("this").unwrap().unwrap()
-            & context.word_docids("word").unwrap().unwrap())
-            - context.word_docids("world").unwrap().unwrap();
-        let expected_2 = CriterionResult {
-            query_tree: Some(Operation::Or(
-                false,
-                vec![Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("split".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("this".to_string()),
-                    }),
-                    Operation::Or(
-                        false,
-                        vec![
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact_with_typo(1, "word".to_string()),
-                            }),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact("world".to_string()),
-                            }),
-                        ],
-                    ),
-                ])],
-            )),
-            candidates: Some(&candidates_2 & &facet_candidates),
-            bucket_candidates: Some(&candidates_2 & &facet_candidates),
-            filtered_candidates: None,
-        };
-
-        assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));
+        "###);
    }
 }
--- a/milli/src/search/facet/filter.rs
+++ b/milli/src/search/facet/filter.rs
@ -45,7 +45,7 @@ impl<'a> Display for FilterError<'a> {
                        attribute,
                    )
                } else {
-                    let filterables_list = filterable_fields.iter().map(AsRef::as_ref).collect::<Vec<_>>().join(" ");
+                    let filterables_list = filterable_fields.iter().map(AsRef::as_ref).collect::<Vec<&str>>().join(" ");

                    write!(
                        f,
--- a/milli/src/search/matches/mod.rs
+++ b/milli/src/search/matches/mod.rs
@ -573,15 +573,18 @@ mod tests {
        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
        let mut matcher = builder.build(text);
        // no crop should return complete text with highlighted matches.
-        assert_eq!(&matcher.format(format_options), "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>.");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."
+        );

        // Text containing some matches.
        let text = "Natalie risk her future to build a world with the boy she loves.";
        let mut matcher = builder.build(text);
        // no crop should return complete text with highlighted matches.
-        assert_eq!(
-            &matcher.format(format_options),
-            "Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"Natalie risk her future to build a <em>world</em> with <em>the</em> boy she loves."
        );
    }

@ -602,19 +605,28 @@ mod tests {
        let text = "Ŵôřlḑôle";
        let mut matcher = builder.build(text);
        // no crop should return complete text with highlighted matches.
-        assert_eq!(&matcher.format(format_options), "<em>Ŵôřlḑ</em>ôle");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"<em>Ŵôřlḑ</em>ôle"
+        );

        // Text containing unicode match.
        let text = "Ŵôřlḑ";
        let mut matcher = builder.build(text);
        // no crop should return complete text with highlighted matches.
-        assert_eq!(&matcher.format(format_options), "<em>Ŵôřlḑ</em>");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"<em>Ŵôřlḑ</em>"
+        );

        // Text containing unicode match.
        let text = "Westfália";
        let mut matcher = builder.build(text);
        // no crop should return complete text with highlighted matches.
-        assert_eq!(&matcher.format(format_options), "<em>Westfáli</em>a");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"<em>Westfáli</em>a"
+        );
    }

    #[test]
@ -628,83 +640,89 @@ mod tests {
        // empty text.
        let text = "";
        let mut matcher = builder.build(text);
-        assert_eq!(&matcher.format(format_options), "");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @""
+        );

        // text containing only separators.
        let text = ":-)";
        let mut matcher = builder.build(text);
-        assert_eq!(&matcher.format(format_options), ":-)");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @":-)"
+        );

        // Text without any match.
        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
        let mut matcher = builder.build(text);
        // no highlight should return 10 first words with a marker at the end.
-        assert_eq!(
-            &matcher.format(format_options),
-            "A quick brown fox can not jump 32 feet, right…"
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"A quick brown fox can not jump 32 feet, right…"
        );

        // Text without any match starting by a separator.
        let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)";
        let mut matcher = builder.build(text);
        // no highlight should return 10 first words with a marker at the end.
-        assert_eq!(
-            &matcher.format(format_options),
-            "(A quick brown fox can not jump 32 feet, right…"
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"(A quick brown fox can not jump 32 feet, right…"
        );

        // Test phrase propagation
        let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.";
        let mut matcher = builder.build(text);
        // should crop the phrase instead of croping around the match.
-        assert_eq!(
-            &matcher.format(format_options),
-            "… Split The World is a book written by Emily Henry…",
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"… Split The World is a book written by Emily Henry…"
        );

        // Text containing some matches.
        let text = "Natalie risk her future to build a world with the boy she loves.";
        let mut matcher = builder.build(text);
        // no highlight should return 10 last words with a marker at the start.
-        assert_eq!(
-            &matcher.format(format_options),
-            "…future to build a world with the boy she loves…"
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…future to build a world with the boy she loves…"
        );

        // Text containing all matches.
        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
        let mut matcher = builder.build(text);
        // no highlight should return 10 last words with a marker at the start.
-        assert_eq!(
-            &matcher.format(format_options),
-            "…she loves. Emily Henry: The Love That Split The World."
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…she loves. Emily Henry: The Love That Split The World."
        );

        // Text containing a match unordered and a match ordered.
        let text = "The world split void void void void void void void void void split the world void void";
        let mut matcher = builder.build(text);
        // crop should return 10 last words with a marker at the start.
-        assert_eq!(
-            &matcher.format(format_options),
-            "…void void void void void split the world void void"
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…void void void void void split the world void void"
        );

        // Text containing matches with diferent density.
        let text = "split void the void void world void void void void void void void void void void split the world void void";
        let mut matcher = builder.build(text);
        // crop should return 10 last words with a marker at the start.
-        assert_eq!(
-            &matcher.format(format_options),
-            "…void void void void void split the world void void"
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…void void void void void split the world void void"
        );

        // Text containing matches with same word.
        let text = "split split split split split split void void void void void void void void void void split the world void void";
        let mut matcher = builder.build(text);
        // crop should return 10 last words with a marker at the start.
-        assert_eq!(
-            &matcher.format(format_options),
-            "…void void void void void split the world void void"
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…void void void void void split the world void void"
        );
    }

@ -719,44 +737,53 @@ mod tests {
        // empty text.
        let text = "";
        let mut matcher = builder.build(text);
-        assert_eq!(&matcher.format(format_options), "");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @""
+        );

        // text containing only separators.
        let text = ":-)";
        let mut matcher = builder.build(text);
-        assert_eq!(&matcher.format(format_options), ":-)");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @":-)"
+        );

        // Text without any match.
        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
        let mut matcher = builder.build(text);
        // both should return 10 first words with a marker at the end.
-        assert_eq!(
-            &matcher.format(format_options),
-            "A quick brown fox can not jump 32 feet, right…"
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"A quick brown fox can not jump 32 feet, right…"
        );

        // Text containing some matches.
        let text = "Natalie risk her future to build a world with the boy she loves.";
        let mut matcher = builder.build(text);
        // both should return 10 last words with a marker at the start and highlighted matches.
-        assert_eq!(
-            &matcher.format(format_options),
-            "…future to build a <em>world</em> with <em>the</em> boy she loves…"
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…future to build a <em>world</em> with <em>the</em> boy she loves…"
        );

        // Text containing all matches.
        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
        let mut matcher = builder.build(text);
        // both should return 10 last words with a marker at the start and highlighted matches.
-        assert_eq!(&matcher.format(format_options), "…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>.");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…she loves. Emily Henry: <em>The</em> Love That <em>Split</em> <em>The</em> <em>World</em>."
+        );

        // Text containing a match unordered and a match ordered.
        let text = "The world split void void void void void void void void void split the world void void";
        let mut matcher = builder.build(text);
        // crop should return 10 last words with a marker at the start.
-        assert_eq!(
-            &matcher.format(format_options),
-            "…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…void void void void void <em>split</em> <em>the</em> <em>world</em> void void"
        );
    }

@ -773,19 +800,28 @@ mod tests {
        let format_options = FormatOptions { highlight: false, crop: Some(2) };
        let mut matcher = builder.build(text);
        // because crop size < query size, partially format matches.
-        assert_eq!(&matcher.format(format_options), "…split the…");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…split the…"
+        );

        // set a smaller crop size
        let format_options = FormatOptions { highlight: false, crop: Some(1) };
        let mut matcher = builder.build(text);
        // because crop size < query size, partially format matches.
-        assert_eq!(&matcher.format(format_options), "…split…");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"…split…"
+        );

        // set  crop size to 0
        let format_options = FormatOptions { highlight: false, crop: Some(0) };
        let mut matcher = builder.build(text);
        // because crop size is 0, crop is ignored.
-        assert_eq!(&matcher.format(format_options), "void void split the world void void.");
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"void void split the world void void."
+        );
    }

    #[test]
@ -820,11 +856,9 @@ mod tests {

        let text = "the do or die can't be he do and or isn't he";
        let mut matcher = builder.build(text);
-        assert_eq!(
-            &matcher.format(format_options),
-            "_the_ _do_ _or_ die can't be he _do_ and or isn'_t_ _he_",
-            "matches: {:?}",
-            &matcher.matches
+        insta::assert_snapshot!(
+            matcher.format(format_options),
+            @"_the_ _do_ _or_ die can't be he _do_ and or isn'_t_ _he_"
        );
    }
 }
--- a/milli/src/search/query_tree.rs
+++ b/milli/src/search/query_tree.rs
@ -99,11 +99,6 @@ impl QueryKind {
        QueryKind::Exact { original_typo: 0, word }
    }

-    #[cfg(test)]
-    pub fn exact_with_typo(original_typo: u8, word: String) -> Self {
-        QueryKind::Exact { original_typo, word }
-    }
-
    pub fn tolerant(typo: u8, word: String) -> Self {
        QueryKind::Tolerant { typo, word }
    }
@ -857,30 +852,16 @@ mod test {
        let query = "hey friends";
        let tokens = query.tokenize();

-        let expected = Operation::Or(
-            false,
-            vec![
-                Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("hey".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: true,
-                        kind: QueryKind::tolerant(1, "friends".to_string()),
-                    }),
-                ]),
-                Operation::Query(Query {
-                    prefix: true,
-                    kind: QueryKind::tolerant(1, "heyfriends".to_string()),
-                }),
-            ],
-        );
-
        let (query_tree, _) =
            TestContext::default().build(false, true, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        OR
+          AND
+            Exact { word: "hey" }
+            PrefixTolerant { word: "friends", max typo: 1 }
+          PrefixTolerant { word: "heyfriends", max typo: 1 }
+        "###);
    }

    #[test]
@ -888,30 +869,16 @@ mod test {
        let query = "hey friends ";
        let tokens = query.tokenize();

-        let expected = Operation::Or(
-            false,
-            vec![
-                Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("hey".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::tolerant(1, "friends".to_string()),
-                    }),
-                ]),
-                Operation::Query(Query {
-                    prefix: false,
-                    kind: QueryKind::tolerant(1, "heyfriends".to_string()),
-                }),
-            ],
-        );
-
        let (query_tree, _) =
            TestContext::default().build(false, true, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        OR
+          AND
+            Exact { word: "hey" }
+            Tolerant { word: "friends", max typo: 1 }
+          Tolerant { word: "heyfriends", max typo: 1 }
+        "###);
    }

    #[test]
@ -919,62 +886,24 @@ mod test {
        let query = "hello world ";
        let tokens = query.tokenize();

-        let expected = Operation::Or(
-            false,
-            vec![
-                Operation::And(vec![
-                    Operation::Or(
-                        false,
-                        vec![
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact("hi".to_string()),
-                            }),
-                            Operation::And(vec![
-                                Operation::Query(Query {
-                                    prefix: false,
-                                    kind: QueryKind::exact("good".to_string()),
-                                }),
-                                Operation::Query(Query {
-                                    prefix: false,
-                                    kind: QueryKind::exact("morning".to_string()),
-                                }),
-                            ]),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::tolerant(1, "hello".to_string()),
-                            }),
-                        ],
-                    ),
-                    Operation::Or(
-                        false,
-                        vec![
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact("earth".to_string()),
-                            }),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact("nature".to_string()),
-                            }),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::tolerant(1, "world".to_string()),
-                            }),
-                        ],
-                    ),
-                ]),
-                Operation::Query(Query {
-                    prefix: false,
-                    kind: QueryKind::tolerant(1, "helloworld".to_string()),
-                }),
-            ],
-        );
-
        let (query_tree, _) =
            TestContext::default().build(false, true, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        OR
+          AND
+            OR
+              Exact { word: "hi" }
+              AND
+                Exact { word: "good" }
+                Exact { word: "morning" }
+              Tolerant { word: "hello", max typo: 1 }
+            OR
+              Exact { word: "earth" }
+              Exact { word: "nature" }
+              Tolerant { word: "world", max typo: 1 }
+          Tolerant { word: "helloworld", max typo: 1 }
+        "###);
    }

    #[test]
@ -982,97 +911,34 @@ mod test {
        let query = "new york city ";
        let tokens = query.tokenize();

-        let expected = Operation::Or(
-            false,
-            vec![
-                Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("new".to_string()),
-                    }),
-                    Operation::Or(
-                        false,
-                        vec![
-                            Operation::And(vec![
-                                Operation::Query(Query {
-                                    prefix: false,
-                                    kind: QueryKind::exact("york".to_string()),
-                                }),
-                                Operation::Query(Query {
-                                    prefix: false,
-                                    kind: QueryKind::exact("city".to_string()),
-                                }),
-                            ]),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::tolerant(1, "yorkcity".to_string()),
-                            }),
-                        ],
-                    ),
-                ]),
-                Operation::And(vec![
-                    Operation::Or(
-                        false,
-                        vec![
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact("nyc".to_string()),
-                            }),
-                            Operation::And(vec![
-                                Operation::Query(Query {
-                                    prefix: false,
-                                    kind: QueryKind::exact("new".to_string()),
-                                }),
-                                Operation::Query(Query {
-                                    prefix: false,
-                                    kind: QueryKind::exact("york".to_string()),
-                                }),
-                                Operation::Query(Query {
-                                    prefix: false,
-                                    kind: QueryKind::exact("city".to_string()),
-                                }),
-                            ]),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::tolerant(1, "newyork".to_string()),
-                            }),
-                        ],
-                    ),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("city".to_string()),
-                    }),
-                ]),
-                Operation::Or(
-                    false,
-                    vec![
-                        Operation::Query(Query {
-                            prefix: false,
-                            kind: QueryKind::exact("nyc".to_string()),
-                        }),
-                        Operation::And(vec![
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact("new".to_string()),
-                            }),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact("york".to_string()),
-                            }),
-                        ]),
-                        Operation::Query(Query {
-                            prefix: false,
-                            kind: QueryKind::tolerant(1, "newyorkcity".to_string()),
-                        }),
-                    ],
-                ),
-            ],
-        );
-
        let (query_tree, _) =
            TestContext::default().build(false, true, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        OR
+          AND
+            Exact { word: "new" }
+            OR
+              AND
+                Exact { word: "york" }
+                Exact { word: "city" }
+              Tolerant { word: "yorkcity", max typo: 1 }
+          AND
+            OR
+              Exact { word: "nyc" }
+              AND
+                Exact { word: "new" }
+                Exact { word: "york" }
+                Exact { word: "city" }
+              Tolerant { word: "newyork", max typo: 1 }
+            Exact { word: "city" }
+          OR
+            Exact { word: "nyc" }
+            AND
+              Exact { word: "new" }
+              Exact { word: "york" }
+            Tolerant { word: "newyorkcity", max typo: 1 }
+        "###);
    }

    #[test]
@ -1080,30 +946,16 @@ mod test {
        let query = "n grams ";
        let tokens = query.tokenize();

-        let expected = Operation::Or(
-            false,
-            vec![
-                Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("n".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::tolerant(1, "grams".to_string()),
-                    }),
-                ]),
-                Operation::Query(Query {
-                    prefix: false,
-                    kind: QueryKind::tolerant(1, "ngrams".to_string()),
-                }),
-            ],
-        );
-
        let (query_tree, _) =
            TestContext::default().build(false, true, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        OR
+          AND
+            Exact { word: "n" }
+            Tolerant { word: "grams", max typo: 1 }
+          Tolerant { word: "ngrams", max typo: 1 }
+        "###);
    }

    #[test]
@ -1111,36 +963,18 @@ mod test {
        let query = "wordsplit fish ";
        let tokens = query.tokenize();

-        let expected = Operation::Or(
-            false,
-            vec![
-                Operation::And(vec![
-                    Operation::Or(
-                        false,
-                        vec![
-                            Operation::Phrase(vec!["word".to_string(), "split".to_string()]),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::tolerant(2, "wordsplit".to_string()),
-                            }),
-                        ],
-                    ),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("fish".to_string()),
-                    }),
-                ]),
-                Operation::Query(Query {
-                    prefix: false,
-                    kind: QueryKind::tolerant(1, "wordsplitfish".to_string()),
-                }),
-            ],
-        );
-
        let (query_tree, _) =
            TestContext::default().build(false, true, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        OR
+          AND
+            OR
+              PHRASE ["word", "split"]
+              Tolerant { word: "wordsplit", max typo: 2 }
+            Exact { word: "fish" }
+          Tolerant { word: "wordsplitfish", max typo: 1 }
+        "###);
    }

    #[test]
@ -1148,15 +982,14 @@ mod test {
        let query = "\"hey friends\" \" \" \"wooop";
        let tokens = query.tokenize();

-        let expected = Operation::And(vec![
-            Operation::Phrase(vec!["hey".to_string(), "friends".to_string()]),
-            Operation::Query(Query { prefix: false, kind: QueryKind::exact("wooop".to_string()) }),
-        ]);
-
        let (query_tree, _) =
            TestContext::default().build(false, true, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        AND
+          PHRASE ["hey", "friends"]
+          Exact { word: "wooop" }
+        "###);
    }

    #[test]
@ -1164,15 +997,14 @@ mod test {
        let query = "\"hey friends. wooop wooop\"";
        let tokens = query.tokenize();

-        let expected = Operation::And(vec![
-            Operation::Phrase(vec!["hey".to_string(), "friends".to_string()]),
-            Operation::Phrase(vec!["wooop".to_string(), "wooop".to_string()]),
-        ]);
-
        let (query_tree, _) =
            TestContext::default().build(false, true, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        AND
+          PHRASE ["hey", "friends"]
+          PHRASE ["wooop", "wooop"]
+        "###);
    }

    #[test]
@ -1180,82 +1012,30 @@ mod test {
        let query = "hey my friend ";
        let tokens = query.tokenize();

-        let expected = Operation::Or(
-            true,
-            vec![
-                Operation::Query(Query {
-                    prefix: false,
-                    kind: QueryKind::exact("hey".to_string()),
-                }),
-                Operation::Or(
-                    false,
-                    vec![
-                        Operation::And(vec![
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact("hey".to_string()),
-                            }),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact("my".to_string()),
-                            }),
-                        ]),
-                        Operation::Query(Query {
-                            prefix: false,
-                            kind: QueryKind::tolerant(1, "heymy".to_string()),
-                        }),
-                    ],
-                ),
-                Operation::Or(
-                    false,
-                    vec![
-                        Operation::And(vec![
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::exact("hey".to_string()),
-                            }),
-                            Operation::Or(
-                                false,
-                                vec![
-                                    Operation::And(vec![
-                                        Operation::Query(Query {
-                                            prefix: false,
-                                            kind: QueryKind::exact("my".to_string()),
-                                        }),
-                                        Operation::Query(Query {
-                                            prefix: false,
-                                            kind: QueryKind::tolerant(1, "friend".to_string()),
-                                        }),
-                                    ]),
-                                    Operation::Query(Query {
-                                        prefix: false,
-                                        kind: QueryKind::tolerant(1, "myfriend".to_string()),
-                                    }),
-                                ],
-                            ),
-                        ]),
-                        Operation::And(vec![
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::tolerant(1, "heymy".to_string()),
-                            }),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::tolerant(1, "friend".to_string()),
-                            }),
-                        ]),
-                        Operation::Query(Query {
-                            prefix: false,
-                            kind: QueryKind::tolerant(1, "heymyfriend".to_string()),
-                        }),
-                    ],
-                ),
-            ],
-        );
        let (query_tree, _) =
            TestContext::default().build(true, true, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        OR(WORD)
+          Exact { word: "hey" }
+          OR
+            AND
+              Exact { word: "hey" }
+              Exact { word: "my" }
+            Tolerant { word: "heymy", max typo: 1 }
+          OR
+            AND
+              Exact { word: "hey" }
+              OR
+                AND
+                  Exact { word: "my" }
+                  Tolerant { word: "friend", max typo: 1 }
+                Tolerant { word: "myfriend", max typo: 1 }
+            AND
+              Tolerant { word: "heymy", max typo: 1 }
+              Tolerant { word: "friend", max typo: 1 }
+            Tolerant { word: "heymyfriend", max typo: 1 }
+        "###);
    }

    #[test]
@ -1263,11 +1043,12 @@ mod test {
        let query = "\"hey my\"";
        let tokens = query.tokenize();

-        let expected = Operation::Phrase(vec!["hey".to_string(), "my".to_string()]);
        let (query_tree, _) =
            TestContext::default().build(true, true, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        PHRASE ["hey", "my"]
+        "###);
    }

    #[test]
@ -1275,68 +1056,27 @@ mod test {
        let query = r#""hey" my good "friend""#;
        let tokens = query.tokenize();

-        let expected = Operation::Or(
-            true,
-            vec![
-                Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("hey".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("friend".to_string()),
-                    }),
-                ]),
-                Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("hey".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("my".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("friend".to_string()),
-                    }),
-                ]),
-                Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("hey".to_string()),
-                    }),
-                    Operation::Or(
-                        false,
-                        vec![
-                            Operation::And(vec![
-                                Operation::Query(Query {
-                                    prefix: false,
-                                    kind: QueryKind::exact("my".to_string()),
-                                }),
-                                Operation::Query(Query {
-                                    prefix: false,
-                                    kind: QueryKind::exact("good".to_string()),
-                                }),
-                            ]),
-                            Operation::Query(Query {
-                                prefix: false,
-                                kind: QueryKind::tolerant(1, "mygood".to_string()),
-                            }),
-                        ],
-                    ),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("friend".to_string()),
-                    }),
-                ]),
-            ],
-        );
        let (query_tree, _) =
            TestContext::default().build(true, true, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        OR(WORD)
+          AND
+            Exact { word: "hey" }
+            Exact { word: "friend" }
+          AND
+            Exact { word: "hey" }
+            Exact { word: "my" }
+            Exact { word: "friend" }
+          AND
+            Exact { word: "hey" }
+            OR
+              AND
+                Exact { word: "my" }
+                Exact { word: "good" }
+              Tolerant { word: "mygood", max typo: 1 }
+            Exact { word: "friend" }
+        "###);
    }

    #[test]
@ -1344,29 +1084,16 @@ mod test {
        let query = "hey friends ";
        let tokens = query.tokenize();

-        let expected = Operation::Or(
-            false,
-            vec![
-                Operation::And(vec![
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("hey".to_string()),
-                    }),
-                    Operation::Query(Query {
-                        prefix: false,
-                        kind: QueryKind::exact("friends".to_string()),
-                    }),
-                ]),
-                Operation::Query(Query {
-                    prefix: false,
-                    kind: QueryKind::exact("heyfriends".to_string()),
-                }),
-            ],
-        );
        let (query_tree, _) =
            TestContext::default().build(false, false, None, tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        OR
+          AND
+            Exact { word: "hey" }
+            Exact { word: "friends" }
+          Exact { word: "heyfriends" }
+        "###);
    }

    #[test]
@ -1374,15 +1101,14 @@ mod test {
        let query = "\"hey my\" good friend";
        let tokens = query.tokenize();

-        let expected = Operation::And(vec![
-            Operation::Phrase(vec!["hey".to_string(), "my".to_string()]),
-            Operation::Query(Query { prefix: false, kind: QueryKind::exact("good".to_string()) }),
-        ]);
-
        let (query_tree, _) =
            TestContext::default().build(false, false, Some(2), tokens).unwrap().unwrap();

-        assert_eq!(expected, query_tree);
+        insta::assert_debug_snapshot!(query_tree, @r###"
+        AND
+          PHRASE ["hey", "my"]
+          Exact { word: "good" }
+        "###);
    }

    #[test]
--- a/milli/src/snapshot_tests.rs
+++ b/milli/src/snapshot_tests.rs
@ -0,0 +1,527 @@
+use std::borrow::Cow;
+use std::fmt::Write;
+use std::path::Path;
+
+use heed::types::ByteSlice;
+use heed::BytesDecode;
+use roaring::RoaringBitmap;
+
+use crate::heed_codec::facet::{
+    FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
+    FacetStringZeroBoundsValueCodec,
+};
+use crate::{make_db_snap_from_iter, CboRoaringBitmapCodec, ExternalDocumentsIds, Index};
+
+#[track_caller]
+pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> insta::Settings {
+    let mut settings = insta::Settings::clone_current();
+    settings.set_prepend_module_to_snapshot(false);
+    let path = Path::new(std::panic::Location::caller().file());
+    let filename = path.file_name().unwrap().to_str().unwrap();
+    settings.set_omit_expression(true);
+    let test_name = std::thread::current().name().unwrap().rsplit("::").next().unwrap().to_owned();
+
+    if let Some(name) = name {
+        settings
+            .set_snapshot_path(Path::new("snapshots").join(filename).join(test_name).join(name));
+    } else {
+        settings.set_snapshot_path(Path::new("snapshots").join(filename).join(test_name));
+    }
+
+    settings
+}
+
+/**
+Create a snapshot test of the given database.
+
+## Arguments
+1. The identifier for the `Index`
+2. The content of the index to snapshot. Available options are:
+    - `settings`
+    - `word_docids`
+    - `exact_word_docids`
+    - `word_prefix_docids`
+    - `exact_word_prefix_docids`
+    - `docid_word_positions`
+    - `word_pair_proximity_docids`
+    - `word_prefix_pair_proximity_docids`
+    - `word_position_docids`
+    - `field_id_word_count_docids`
+    - `word_prefix_position_docids`
+    - `facet_id_f64_docids`
+    - `facet_id_string_docids`
+    - `documents_ids`
+    - `stop_words`
+    - `soft_deleted_documents_ids`
+    - `field_distribution`
+    - `fields_ids_map`
+    - `geo_faceted_documents_ids`
+    - `external_documents_ids`
+    - `number_faceted_documents_ids`
+    - `string_faceted_documents_ids`
+    - `words_fst`
+    - `words_prefixes_fst`
+
+3. The identifier for the snapshot test (optional)
+4. `@""` to write the snapshot inline (optional)
+
+## Behaviour
+The content of the database will be printed either inline or to the file system
+at `test_directory/test_file.rs/test_name/db_name.snap`.
+
+If the database is too large, then only the hash of the database will be saved, with
+the name `db_name.hash.snap`. To *also* save the full content of the database anyway,
+set the `MILLI_TEST_FULL_SNAPS` environment variable to `true`. The full snapshot will
+be saved with the name `db_name.full.snap` but will not be saved to the git repository.
+
+Running `cargo test` will check whether the old snapshot is identical to the
+current one. If they are equal, the test passes. Otherwise, the test fails.
+
+Use the command line `cargo insta` to approve or reject new snapshots.
+
+## Example
+```ignore
+let index = TempIndex::new();
+
+// basic usages
+db_snap!(index, word_docids);
+
+// named snapshot to avoid conflicts
+db_snap!(index, word_docids, "some_identifier");
+
+// write the snapshot inline
+db_snap!(index, word_docids, @""); // will be autocompleted by running `cargo insta review`
+
+// give a name to the inline snapshot
+db_snap!(index, word_docids, "some_identifier", @"");
+```
+*/
+#[macro_export]
+macro_rules! db_snap {
+    ($index:ident, $db_name:ident, $name:expr) => {
+        let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(Some(
+            &format!("{}", $name),
+        ));
+        settings.bind(|| {
+            let snap = $crate::full_snap_of_db!($index, $db_name);
+            let snaps = $crate::snapshot_tests::convert_snap_to_hash_if_needed(stringify!($db_name), &snap, false);
+            for (name, snap) in snaps {
+                insta::assert_snapshot!(name, snap);
+            }
+        });
+    };
+    ($index:ident, $db_name:ident) => {
+        let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(None);
+        settings.bind(|| {
+            let snap = $crate::full_snap_of_db!($index, $db_name);
+            let snaps = $crate::snapshot_tests::convert_snap_to_hash_if_needed(stringify!($db_name), &snap, false);
+            for (name, snap) in snaps {
+                insta::assert_snapshot!(name, snap);
+            }
+        });
+    };
+    ($index:ident, $db_name:ident, @$inline:literal) => {
+        let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(None);
+        settings.bind(|| {
+            let snap = $crate::full_snap_of_db!($index, $db_name);
+            let snaps = $crate::snapshot_tests::convert_snap_to_hash_if_needed(stringify!($db_name), &snap, true);
+            for (name, snap) in snaps {
+                if !name.ends_with(".full") {
+                    insta::assert_snapshot!(snap, @$inline);
+                } else {
+                    insta::assert_snapshot!(name, snap);
+                }
+            }
+        });
+    };
+    ($index:ident, $db_name:ident, $name:literal, @$inline:literal) => {
+        let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(Some(&format!("{}", $name)));
+        settings.bind(|| {
+            let snap = $crate::full_snap_of_db!($index, $db_name);
+            let snaps = $crate::snapshot_tests::convert_snap_to_hash_if_needed(stringify!($db_name), &snap, true);
+            for (name, snap) in snaps {
+                if !name.ends_with(".full") {
+                    insta::assert_snapshot!(snap, @$inline);
+                } else {
+                    insta::assert_snapshot!(name, snap);
+                }
+            }
+        });
+    };
+}
+
+pub fn snap_word_docids(index: &Index) -> String {
+    let snap = make_db_snap_from_iter!(index, word_docids, |(s, b)| {
+        &format!("{s:<16} {}", display_bitmap(&b))
+    });
+    snap
+}
+pub fn snap_exact_word_docids(index: &Index) -> String {
+    let snap = make_db_snap_from_iter!(index, exact_word_docids, |(s, b)| {
+        &format!("{s:<16} {}", display_bitmap(&b))
+    });
+    snap
+}
+pub fn snap_word_prefix_docids(index: &Index) -> String {
+    let snap = make_db_snap_from_iter!(index, word_prefix_docids, |(s, b)| {
+        &format!("{s:<16} {}", display_bitmap(&b))
+    });
+    snap
+}
+pub fn snap_exact_word_prefix_docids(index: &Index) -> String {
+    let snap = make_db_snap_from_iter!(index, exact_word_prefix_docids, |(s, b)| {
+        &format!("{s:<16} {}", display_bitmap(&b))
+    });
+    snap
+}
+pub fn snap_docid_word_positions(index: &Index) -> String {
+    let snap = make_db_snap_from_iter!(index, docid_word_positions, |((idx, s), b)| {
+        &format!("{idx:<6} {s:<16} {}", display_bitmap(&b))
+    });
+    snap
+}
+pub fn snap_word_pair_proximity_docids(index: &Index) -> String {
+    let snap = make_db_snap_from_iter!(index, word_pair_proximity_docids, |(
+        (word1, word2, proximity),
+        b,
+    )| {
+        &format!("{word1:<16} {word2:<16} {proximity:<2} {}", display_bitmap(&b))
+    });
+    snap
+}
+pub fn snap_word_prefix_pair_proximity_docids(index: &Index) -> String {
+    let snap = make_db_snap_from_iter!(index, word_prefix_pair_proximity_docids, |(
+        (word1, prefix, proximity),
+        b,
+    )| {
+        &format!("{word1:<16} {prefix:<4} {proximity:<2} {}", display_bitmap(&b))
+    });
+    snap
+}
+pub fn snap_word_position_docids(index: &Index) -> String {
+    let snap = make_db_snap_from_iter!(index, word_position_docids, |((word, position), b)| {
+        &format!("{word:<16} {position:<6} {}", display_bitmap(&b))
+    });
+    snap
+}
+pub fn snap_field_id_word_count_docids(index: &Index) -> String {
+    let snap = make_db_snap_from_iter!(index, field_id_word_count_docids, |(
+        (field_id, word_count),
+        b,
+    )| {
+        &format!("{field_id:<3} {word_count:<6} {}", display_bitmap(&b))
+    });
+    snap
+}
+pub fn snap_word_prefix_position_docids(index: &Index) -> String {
+    let snap = make_db_snap_from_iter!(index, word_prefix_position_docids, |(
+        (word_prefix, position),
+        b,
+    )| {
+        &format!("{word_prefix:<4} {position:<6} {}", display_bitmap(&b))
+    });
+    snap
+}
+pub fn snap_facet_id_f64_docids(index: &Index) -> String {
+    let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |(
+        (facet_id, level, left, right),
+        b,
+    )| {
+        &format!("{facet_id:<3} {level:<2} {left:<6} {right:<6} {}", display_bitmap(&b))
+    });
+    snap
+}
+pub fn snap_facet_id_string_docids(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let bytes_db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>();
+    let iter = bytes_db.iter(&rtxn).unwrap();
+    let mut snap = String::new();
+
+    for x in iter {
+        let (key, value) = x.unwrap();
+        if let Some((field_id, normalized_str)) = FacetStringLevelZeroCodec::bytes_decode(key) {
+            let (orig_string, docids) =
+                FacetStringLevelZeroValueCodec::bytes_decode(value).unwrap();
+            snap.push_str(&format!(
+                "{field_id:<3} {normalized_str:<8} {orig_string:<8} {}\n",
+                display_bitmap(&docids)
+            ));
+        } else if let Some((field_id, level, left, right)) =
+            FacetLevelValueU32Codec::bytes_decode(key)
+        {
+            snap.push_str(&format!("{field_id:<3} {level:<2} {left:<6} {right:<6} "));
+            let (bounds, docids) =
+                FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(value)
+                    .unwrap();
+            if let Some((left, right)) = bounds {
+                snap.push_str(&format!("{left:<8} {right:<8} "));
+            }
+            snap.push_str(&display_bitmap(&docids));
+            snap.push('\n');
+        } else {
+            panic!();
+        }
+    }
+    snap
+}
+pub fn snap_documents_ids(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let documents_ids = index.documents_ids(&rtxn).unwrap();
+    let snap = display_bitmap(&documents_ids);
+    snap
+}
+pub fn snap_stop_words(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let stop_words = index.stop_words(&rtxn).unwrap();
+    let snap = format!("{stop_words:?}");
+    snap
+}
+pub fn snap_soft_deleted_documents_ids(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap();
+    let soft_deleted_documents_ids = display_bitmap(&soft_deleted_documents_ids);
+    soft_deleted_documents_ids
+}
+pub fn snap_field_distributions(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let mut snap = String::new();
+    for (field, count) in index.field_distribution(&rtxn).unwrap() {
+        writeln!(&mut snap, "{field:<16} {count:<6}").unwrap();
+    }
+    snap
+}
+pub fn snap_fields_ids_map(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
+    let mut snap = String::new();
+    for field_id in fields_ids_map.ids() {
+        let name = fields_ids_map.name(field_id).unwrap();
+        writeln!(&mut snap, "{field_id:<3} {name:<16}").unwrap();
+    }
+    snap
+}
+pub fn snap_geo_faceted_documents_ids(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let geo_faceted_documents_ids = index.geo_faceted_documents_ids(&rtxn).unwrap();
+    let snap = display_bitmap(&geo_faceted_documents_ids);
+    snap
+}
+pub fn snap_external_documents_ids(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let ExternalDocumentsIds { soft, hard, .. } = index.external_documents_ids(&rtxn).unwrap();
+    let mut snap = String::new();
+    let soft_bytes = soft.into_fst().as_bytes().to_owned();
+    let mut hex_soft = String::new();
+    for byte in soft_bytes {
+        write!(&mut hex_soft, "{:x}", byte).unwrap();
+    }
+    writeln!(&mut snap, "soft: {hex_soft}").unwrap();
+    let hard_bytes = hard.into_fst().as_bytes().to_owned();
+    let mut hex_hard = String::new();
+    for byte in hard_bytes {
+        write!(&mut hex_hard, "{:x}", byte).unwrap();
+    }
+    writeln!(&mut snap, "hard: {hex_hard}").unwrap();
+    snap
+}
+pub fn snap_number_faceted_documents_ids(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
+    let mut snap = String::new();
+    for field_id in fields_ids_map.ids() {
+        let number_faceted_documents_ids =
+            index.number_faceted_documents_ids(&rtxn, field_id).unwrap();
+        writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&number_faceted_documents_ids))
+            .unwrap();
+    }
+    snap
+}
+pub fn snap_string_faceted_documents_ids(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let fields_ids_map = index.fields_ids_map(&rtxn).unwrap();
+
+    let mut snap = String::new();
+    for field_id in fields_ids_map.ids() {
+        let string_faceted_documents_ids =
+            index.string_faceted_documents_ids(&rtxn, field_id).unwrap();
+        writeln!(&mut snap, "{field_id:<3} {}", display_bitmap(&string_faceted_documents_ids))
+            .unwrap();
+    }
+    snap
+}
+pub fn snap_words_fst(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let words_fst = index.words_fst(&rtxn).unwrap();
+    let bytes = words_fst.into_fst().as_bytes().to_owned();
+    let mut snap = String::new();
+    for byte in bytes {
+        write!(&mut snap, "{:x}", byte).unwrap();
+    }
+    snap
+}
+pub fn snap_words_prefixes_fst(index: &Index) -> String {
+    let rtxn = index.read_txn().unwrap();
+    let words_prefixes_fst = index.words_prefixes_fst(&rtxn).unwrap();
+    let bytes = words_prefixes_fst.into_fst().as_bytes().to_owned();
+    let mut snap = String::new();
+    for byte in bytes {
+        write!(&mut snap, "{:x}", byte).unwrap();
+    }
+    snap
+}
+
+pub fn snap_settings(index: &Index) -> String {
+    let mut snap = String::new();
+    let rtxn = index.read_txn().unwrap();
+
+    macro_rules! write_setting_to_snap {
+        ($name:ident) => {
+            let $name = index.$name(&rtxn).unwrap();
+            writeln!(&mut snap, "{}: {:?}", stringify!($name), $name).unwrap();
+        };
+    }
+
+    write_setting_to_snap!(primary_key);
+    write_setting_to_snap!(criteria);
+    write_setting_to_snap!(displayed_fields);
+    write_setting_to_snap!(distinct_field);
+    write_setting_to_snap!(filterable_fields);
+    write_setting_to_snap!(sortable_fields);
+    write_setting_to_snap!(synonyms);
+    write_setting_to_snap!(authorize_typos);
+    write_setting_to_snap!(min_word_len_one_typo);
+    write_setting_to_snap!(min_word_len_two_typos);
+    write_setting_to_snap!(exact_words);
+    write_setting_to_snap!(exact_attributes);
+    write_setting_to_snap!(max_values_per_facet);
+    write_setting_to_snap!(pagination_max_total_hits);
+    write_setting_to_snap!(searchable_fields);
+    write_setting_to_snap!(user_defined_searchable_fields);
+
+    snap
+}
+
+#[macro_export]
+macro_rules! full_snap_of_db {
+    ($index:ident, settings) => {{
+        $crate::snapshot_tests::snap_settings(&$index)
+    }};
+    ($index:ident, word_docids) => {{
+        $crate::snapshot_tests::snap_word_docids(&$index)
+    }};
+    ($index:ident, exact_word_docids) => {{
+        $crate::snapshot_tests::snap_exact_word_docids(&$index)
+    }};
+    ($index:ident, word_prefix_docids) => {{
+        $crate::snapshot_tests::snap_word_prefix_docids(&$index)
+    }};
+    ($index:ident, exact_word_prefix_docids) => {{
+        $crate::snapshot_tests::snap_exact_word_prefix_docids(&$index)
+    }};
+    ($index:ident, docid_word_positions) => {{
+        $crate::snapshot_tests::snap_docid_word_positions(&$index)
+    }};
+    ($index:ident, word_pair_proximity_docids) => {{
+        $crate::snapshot_tests::snap_word_pair_proximity_docids(&$index)
+    }};
+    ($index:ident, word_prefix_pair_proximity_docids) => {{
+        $crate::snapshot_tests::snap_word_prefix_pair_proximity_docids(&$index)
+    }};
+    ($index:ident, word_position_docids) => {{
+        $crate::snapshot_tests::snap_word_position_docids(&$index)
+    }};
+    ($index:ident, field_id_word_count_docids) => {{
+        $crate::snapshot_tests::snap_field_id_word_count_docids(&$index)
+    }};
+    ($index:ident, word_prefix_position_docids) => {{
+        $crate::snapshot_tests::snap_word_prefix_position_docids(&$index)
+    }};
+    ($index:ident, facet_id_f64_docids) => {{
+        $crate::snapshot_tests::snap_facet_id_f64_docids(&$index)
+    }};
+    ($index:ident, facet_id_string_docids) => {{
+        $crate::snapshot_tests::snap_facet_id_string_docids(&$index)
+    }};
+    ($index:ident, documents_ids) => {{
+        $crate::snapshot_tests::snap_documents_ids(&$index)
+    }};
+    ($index:ident, stop_words) => {{
+        $crate::snapshot_tests::snap_stop_words(&$index)
+    }};
+    ($index:ident, soft_deleted_documents_ids) => {{
+        $crate::snapshot_tests::snap_soft_deleted_documents_ids(&$index)
+    }};
+    ($index:ident, field_distribution) => {{
+        $crate::snapshot_tests::snap_field_distributions(&$index)
+    }};
+    ($index:ident, fields_ids_map) => {{
+        $crate::snapshot_tests::snap_fields_ids_map(&$index)
+    }};
+    ($index:ident, geo_faceted_documents_ids) => {{
+        $crate::snapshot_tests::snap_geo_faceted_documents_ids(&$index)
+    }};
+    ($index:ident, external_documents_ids) => {{
+        $crate::snapshot_tests::snap_external_documents_ids(&$index)
+    }};
+    ($index:ident, number_faceted_documents_ids) => {{
+        $crate::snapshot_tests::snap_number_faceted_documents_ids(&$index)
+    }};
+    ($index:ident, string_faceted_documents_ids) => {{
+        $crate::snapshot_tests::snap_string_faceted_documents_ids(&$index)
+    }};
+    ($index:ident, words_fst) => {{
+        $crate::snapshot_tests::snap_words_fst(&$index)
+    }};
+    ($index:ident, words_prefixes_fst) => {{
+        $crate::snapshot_tests::snap_words_prefixes_fst(&$index)
+    }};
+}
+
+pub fn convert_snap_to_hash_if_needed<'snap>(
+    name: &str,
+    snap: &'snap str,
+    inline: bool,
+) -> Vec<(String, Cow<'snap, str>)> {
+    let store_whole_snapshot = std::env::var("MILLI_TEST_FULL_SNAPS").unwrap_or("false".to_owned());
+    let store_whole_snapshot: bool = store_whole_snapshot.parse().unwrap();
+
+    let max_len = if inline { 256 } else { 2048 };
+
+    if snap.len() < max_len {
+        vec![(name.to_owned(), Cow::Borrowed(snap))]
+    } else {
+        let mut r = vec![];
+        if store_whole_snapshot {
+            r.push((format!("{name}.full"), Cow::Borrowed(snap)));
+        }
+        let hash = md5::compute(snap.as_bytes());
+        let hash_str = format!("{hash:x}");
+        r.push((format!("{name}.hash"), Cow::Owned(hash_str)));
+        r
+    }
+}
+
+#[macro_export]
+macro_rules! make_db_snap_from_iter {
+    ($index:ident, $name:ident, |$vars:pat| $push:block) => {{
+        let rtxn = $index.read_txn().unwrap();
+        let iter = $index.$name.iter(&rtxn).unwrap();
+        let mut snap = String::new();
+        for x in iter {
+            let $vars = x.unwrap();
+            snap.push_str($push);
+            snap.push('\n');
+        }
+        snap
+    }};
+}
+
+pub fn display_bitmap(b: &RoaringBitmap) -> String {
+    let mut s = String::new();
+    s.push('[');
+    for x in b.into_iter() {
+        write!(&mut s, "{x}, ").unwrap();
+    }
+    s.push(']');
+    s
+}
--- a/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap
+++ b/milli/src/snapshots/index.rs/initial_field_distribution/1/field_distribution.snap
@ -0,0 +1,7 @@
+---
+source: milli/src/index.rs
+---
+age              1     
+id               2     
+name             2     
+
--- a/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap
+++ b/milli/src/snapshots/index.rs/initial_field_distribution/field_distribution.snap
@ -0,0 +1,7 @@
+---
+source: milli/src/index.rs
+---
+age              1     
+id               2     
+name             2     
+
--- a/milli/src/update/facets.rs
+++ b/milli/src/update/facets.rs
@ -342,3 +342,93 @@ fn write_string_entry(
    writer.insert(&key, &data)?;
    Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use std::num::NonZeroUsize;
+
+    use crate::db_snap;
+    use crate::documents::documents_batch_reader_from_objects;
+    use crate::index::tests::TempIndex;
+
+    #[test]
+    fn test_facets_number() {
+        let test =
+            |name: &str, group_size: Option<NonZeroUsize>, min_level_size: Option<NonZeroUsize>| {
+                let mut index = TempIndex::new_with_map_size(4096 * 1000 * 10); // 40MB
+                index.index_documents_config.autogenerate_docids = true;
+                index.index_documents_config.facet_level_group_size = group_size;
+                index.index_documents_config.facet_min_level_size = min_level_size;
+
+                index
+                    .update_settings(|settings| {
+                        settings.set_filterable_fields(
+                            IntoIterator::into_iter(["facet".to_owned(), "facet2".to_owned()])
+                                .collect(),
+                        );
+                    })
+                    .unwrap();
+
+                let mut documents = vec![];
+                for i in 0..1_000 {
+                    documents.push(serde_json::json!({ "facet": i }).as_object().unwrap().clone());
+                }
+                for i in 0..100 {
+                    documents.push(serde_json::json!({ "facet2": i }).as_object().unwrap().clone());
+                }
+                let documents = documents_batch_reader_from_objects(documents);
+
+                index.add_documents(documents).unwrap();
+
+                db_snap!(index, facet_id_f64_docids, name);
+            };
+
+        test("default", None, None);
+        test("tiny_groups_tiny_levels", NonZeroUsize::new(1), NonZeroUsize::new(1));
+        test("small_groups_small_levels", NonZeroUsize::new(2), NonZeroUsize::new(2));
+        test("small_groups_large_levels", NonZeroUsize::new(2), NonZeroUsize::new(128));
+        test("large_groups_small_levels", NonZeroUsize::new(16), NonZeroUsize::new(2));
+        test("large_groups_large_levels", NonZeroUsize::new(16), NonZeroUsize::new(256));
+    }
+
+    #[test]
+    fn test_facets_string() {
+        let test = |name: &str,
+                    group_size: Option<NonZeroUsize>,
+                    min_level_size: Option<NonZeroUsize>| {
+            let mut index = TempIndex::new_with_map_size(4096 * 1000 * 10); // 40MB
+            index.index_documents_config.autogenerate_docids = true;
+            index.index_documents_config.facet_level_group_size = group_size;
+            index.index_documents_config.facet_min_level_size = min_level_size;
+
+            index
+                .update_settings(|settings| {
+                    settings.set_filterable_fields(
+                        IntoIterator::into_iter(["facet".to_owned(), "facet2".to_owned()])
+                            .collect(),
+                    );
+                })
+                .unwrap();
+
+            let mut documents = vec![];
+            for i in 0..100 {
+                documents.push(
+                    serde_json::json!({ "facet": format!("s{i:X}") }).as_object().unwrap().clone(),
+                );
+            }
+            for i in 0..10 {
+                documents.push(
+                    serde_json::json!({ "facet2": format!("s{i:X}") }).as_object().unwrap().clone(),
+                );
+            }
+            let documents = documents_batch_reader_from_objects(documents);
+
+            index.add_documents(documents).unwrap();
+
+            db_snap!(index, facet_id_string_docids, name);
+        };
+
+        test("default", None, None);
+        test("tiny_groups_tiny_levels", NonZeroUsize::new(1), NonZeroUsize::new(1));
+    }
+}
--- a/milli/src/update/snapshots/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap
+++ b/milli/src/update/snapshots/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facets.rs
+---
+587899707db2848da3f18399e14ed4d0
--- a/milli/src/update/snapshots/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap
+++ b/milli/src/update/snapshots/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facets.rs
+---
+02bbf2ca1663cccea0e4c06d5ad06a45
--- a/milli/src/update/snapshots/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap
+++ b/milli/src/update/snapshots/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facets.rs
+---
+e68ea591e1af3e53e544dff9a1648e88
--- a/milli/src/update/snapshots/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap
+++ b/milli/src/update/snapshots/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facets.rs
+---
+12a4bb0f5b95d7629c2b9a915150c0cf
--- a/milli/src/update/snapshots/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap
+++ b/milli/src/update/snapshots/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facets.rs
+---
+6438e94bc7fada13022e0efccdf294e0
--- a/milli/src/update/snapshots/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap
+++ b/milli/src/update/snapshots/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facets.rs
+---
+5348bbc46b5384455b6a900666d2a502
--- a/milli/src/update/snapshots/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap
+++ b/milli/src/update/snapshots/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facets.rs
+---
+faddef9eae5f2efacfec51f20f2e8cd6
--- a/milli/src/update/snapshots/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap
+++ b/milli/src/update/snapshots/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap
@ -0,0 +1,4 @@
+---
+source: milli/src/update/facets.rs
+---
+ddb8fc987c5dc892337682595043858e
--- a/milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap
@ -0,0 +1,46 @@
+---
+source: milli/src/update/word_prefix_pair_proximity_docids.rs
+---
+5                a    1  [101, ]
+5                a    2  [101, ]
+5                b    4  [101, ]
+5                be   4  [101, ]
+am               a    3  [101, ]
+amazing          a    1  [100, ]
+amazing          a    2  [100, ]
+amazing          a    3  [100, ]
+amazing          b    2  [100, ]
+amazing          be   2  [100, ]
+an               a    1  [100, ]
+an               a    2  [100, ]
+an               b    3  [100, ]
+an               be   3  [100, ]
+and              a    2  [100, ]
+and              a    3  [100, ]
+and              a    4  [100, ]
+and              b    1  [100, ]
+and              be   1  [100, ]
+at               a    1  [100, ]
+at               a    2  [100, 101, ]
+at               a    3  [100, ]
+at               b    3  [101, ]
+at               b    4  [100, ]
+at               be   3  [101, ]
+at               be   4  [100, ]
+beautiful        a    2  [100, ]
+beautiful        a    3  [100, ]
+beautiful        a    4  [100, ]
+bell             a    2  [101, ]
+bell             a    4  [101, ]
+house            a    3  [100, ]
+house            a    4  [100, ]
+house            b    2  [100, ]
+house            be   2  [100, ]
+rings            a    1  [101, ]
+rings            a    3  [101, ]
+rings            b    2  [101, ]
+rings            be   2  [101, ]
+the              a    3  [101, ]
+the              b    1  [101, ]
+the              be   1  [101, ]
+
--- a/milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap
@ -0,0 +1,56 @@
+---
+source: milli/src/update/word_prefix_pair_proximity_docids.rs
+---
+5                a    1  [101, ]
+5                a    2  [101, ]
+5                am   1  [101, ]
+5                b    4  [101, ]
+5                be   4  [101, ]
+am               a    3  [101, ]
+amazing          a    1  [100, ]
+amazing          a    2  [100, ]
+amazing          a    3  [100, ]
+amazing          b    2  [100, ]
+amazing          be   2  [100, ]
+an               a    1  [100, ]
+an               a    2  [100, 202, ]
+an               am   1  [100, ]
+an               b    3  [100, ]
+an               be   3  [100, ]
+and              a    2  [100, ]
+and              a    3  [100, ]
+and              a    4  [100, ]
+and              am   2  [100, ]
+and              b    1  [100, ]
+and              be   1  [100, ]
+at               a    1  [100, 202, ]
+at               a    2  [100, 101, ]
+at               a    3  [100, ]
+at               am   2  [100, 101, ]
+at               b    3  [101, ]
+at               b    4  [100, ]
+at               be   3  [101, ]
+at               be   4  [100, ]
+beautiful        a    2  [100, ]
+beautiful        a    3  [100, ]
+beautiful        a    4  [100, ]
+beautiful        am   3  [100, ]
+bell             a    2  [101, ]
+bell             a    4  [101, ]
+bell             am   4  [101, ]
+extraordinary    a    2  [202, ]
+extraordinary    a    3  [202, ]
+house            a    3  [100, 202, ]
+house            a    4  [100, 202, ]
+house            am   4  [100, ]
+house            b    2  [100, ]
+house            be   2  [100, ]
+rings            a    1  [101, ]
+rings            a    3  [101, ]
+rings            am   3  [101, ]
+rings            b    2  [101, ]
+rings            be   2  [101, ]
+the              a    3  [101, ]
+the              b    1  [101, ]
+the              be   1  [101, ]
+
--- a/milli/src/update/word_prefix_pair_proximity_docids.rs
+++ b/milli/src/update/word_prefix_pair_proximity_docids.rs
@ -244,3 +244,88 @@ fn insert_current_prefix_data_in_sorter<'a>(

    Ok(())
 }
+
+#[cfg(test)]
+mod tests {
+    use std::io::Cursor;
+
+    use crate::db_snap;
+    use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
+    use crate::index::tests::TempIndex;
+
+    fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> {
+        let mut documents = Vec::new();
+        for prefix in prefixes {
+            for i in 0..50 {
+                documents.push(
+                    serde_json::json!({
+                        "text": format!("{prefix}{i:x}"),
+                    })
+                    .as_object()
+                    .unwrap()
+                    .clone(),
+                )
+            }
+        }
+        documents
+    }
+
+    #[test]
+    fn test_update() {
+        let mut index = TempIndex::new();
+        index.index_documents_config.words_prefix_threshold = Some(50);
+        index.index_documents_config.autogenerate_docids = true;
+
+        index
+            .update_settings(|settings| {
+                settings.set_searchable_fields(vec!["text".to_owned()]);
+            })
+            .unwrap();
+
+        let batch_reader_from_documents = |documents| {
+            let mut builder = DocumentsBatchBuilder::new(Vec::new());
+            for object in documents {
+                builder.append_json_object(&object).unwrap();
+            }
+            DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
+        };
+
+        let mut documents = documents_with_enough_different_words_for_prefixes(&["a", "be"]);
+        // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
+        documents.push(
+            serde_json::json!({
+                "text": "At an amazing and beautiful house"
+            })
+            .as_object()
+            .unwrap()
+            .clone(),
+        );
+        documents.push(
+            serde_json::json!({
+                "text": "The bell rings at 5 am"
+            })
+            .as_object()
+            .unwrap()
+            .clone(),
+        );
+
+        let documents = batch_reader_from_documents(documents);
+        index.add_documents(documents).unwrap();
+
+        db_snap!(index, word_prefix_pair_proximity_docids, "initial");
+
+        let mut documents = documents_with_enough_different_words_for_prefixes(&["am", "an"]);
+        documents.push(
+            serde_json::json!({
+                "text": "At an extraordinary house"
+            })
+            .as_object()
+            .unwrap()
+            .clone(),
+        );
+        let documents = batch_reader_from_documents(documents);
+        index.add_documents(documents).unwrap();
+
+        db_snap!(index, word_prefix_pair_proximity_docids, "update");
+    }
+}