Add snapshot tests for indexing of word_prefix_pair_proximity_docids

2025-05-18 02:03:57 +02:00 · 2022-08-03 16:49:03 +02:00 · 2022-08-03 16:49:03 +02:00 · 6066256689
commit 6066256689
parent 3a734af159
5 changed files with 195 additions and 13 deletions
--- a/milli/src/documents/mod.rs
+++ b/milli/src/documents/mod.rs
@ -190,17 +190,6 @@ pub fn documents_batch_reader_from_objects(
    DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap()
 }
 #[cfg(test)]
 pub fn batch_reader_from_documents(
    documents: &[Object],
 ) -> DocumentsBatchReader<std::io::Cursor<Vec<u8>>> {
    let mut builder = DocumentsBatchBuilder::new(Vec::new());
    for object in documents {
        builder.append_json_object(&object).unwrap();
    }
    DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap()
 }
 #[cfg(test)]
 mod test {
    use std::io::Cursor;
--- a/milli/src/update/facets.rs
+++ b/milli/src/update/facets.rs
@ -347,7 +347,11 @@ fn write_string_entry(
 mod tests {
    use std::num::NonZeroUsize;
-    use crate::{db_snap, documents::batch_reader_from_documents, index::tests::TempIndex};
+    use crate::{
        db_snap,
        documents::{batch_reader_from_documents, documents_batch_reader_from_objects},
        index::tests::TempIndex,
    };
    #[test]
    fn test_facets_number() {
@ -419,7 +423,7 @@ mod tests {
                    serde_json::json!({ "facet2": format!("s{i:X}") }).as_object().unwrap().clone(),
                );
            }
-            let documents = batch_reader_from_documents(&documents);
+            let documents = documents_batch_reader_from_objects(documents);
            index.add_documents(documents).unwrap();
--- a/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/initial/word_prefix_pair_proximity_docids.snap
@ -0,0 +1,46 @@
 ---
 source: milli/src/update/word_prefix_pair_proximity_docids.rs
 ---
 5                a    1  [101, ]
 5                a    2  [101, ]
 5                b    4  [101, ]
 5                be   4  [101, ]
 am               a    3  [101, ]
 amazing          a    1  [100, ]
 amazing          a    2  [100, ]
 amazing          a    3  [100, ]
 amazing          b    2  [100, ]
 amazing          be   2  [100, ]
 an               a    1  [100, ]
 an               a    2  [100, ]
 an               b    3  [100, ]
 an               be   3  [100, ]
 and              a    2  [100, ]
 and              a    3  [100, ]
 and              a    4  [100, ]
 and              b    1  [100, ]
 and              be   1  [100, ]
 at               a    1  [100, ]
 at               a    2  [100, 101, ]
 at               a    3  [100, ]
 at               b    3  [101, ]
 at               b    4  [100, ]
 at               be   3  [101, ]
 at               be   4  [100, ]
 beautiful        a    2  [100, ]
 beautiful        a    3  [100, ]
 beautiful        a    4  [100, ]
 bell             a    2  [101, ]
 bell             a    4  [101, ]
 house            a    3  [100, ]
 house            a    4  [100, ]
 house            b    2  [100, ]
 house            be   2  [100, ]
 rings            a    1  [101, ]
 rings            a    3  [101, ]
 rings            b    2  [101, ]
 rings            be   2  [101, ]
 the              a    3  [101, ]
 the              b    1  [101, ]
 the              be   1  [101, ]
--- a/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap
+++ b/milli/src/update/snapshots/update/word_prefix_pair_proximity_docids.rs/test_update/update/word_prefix_pair_proximity_docids.snap
@ -0,0 +1,56 @@
 ---
 source: milli/src/update/word_prefix_pair_proximity_docids.rs
 ---
 5                a    1  [101, ]
 5                a    2  [101, ]
 5                am   1  [101, ]
 5                b    4  [101, ]
 5                be   4  [101, ]
 am               a    3  [101, ]
 amazing          a    1  [100, ]
 amazing          a    2  [100, ]
 amazing          a    3  [100, ]
 amazing          b    2  [100, ]
 amazing          be   2  [100, ]
 an               a    1  [100, ]
 an               a    2  [100, 202, ]
 an               am   1  [100, ]
 an               b    3  [100, ]
 an               be   3  [100, ]
 and              a    2  [100, ]
 and              a    3  [100, ]
 and              a    4  [100, ]
 and              am   2  [100, ]
 and              b    1  [100, ]
 and              be   1  [100, ]
 at               a    1  [100, 202, ]
 at               a    2  [100, 101, ]
 at               a    3  [100, ]
 at               am   2  [100, 101, ]
 at               b    3  [101, ]
 at               b    4  [100, ]
 at               be   3  [101, ]
 at               be   4  [100, ]
 beautiful        a    2  [100, ]
 beautiful        a    3  [100, ]
 beautiful        a    4  [100, ]
 beautiful        am   3  [100, ]
 bell             a    2  [101, ]
 bell             a    4  [101, ]
 bell             am   4  [101, ]
 extraordinary    a    2  [202, ]
 extraordinary    a    3  [202, ]
 house            a    3  [100, 202, ]
 house            a    4  [100, 202, ]
 house            am   4  [100, ]
 house            b    2  [100, ]
 house            be   2  [100, ]
 rings            a    1  [101, ]
 rings            a    3  [101, ]
 rings            am   3  [101, ]
 rings            b    2  [101, ]
 rings            be   2  [101, ]
 the              a    3  [101, ]
 the              b    1  [101, ]
 the              be   1  [101, ]
--- a/milli/src/update/word_prefix_pair_proximity_docids.rs
+++ b/milli/src/update/word_prefix_pair_proximity_docids.rs
@ -244,3 +244,90 @@ fn insert_current_prefix_data_in_sorter<'a>(
    Ok(())
 }
 #[cfg(test)]
 mod tests {
    use std::io::Cursor;
    use crate::{
        db_snap,
        documents::{DocumentsBatchBuilder, DocumentsBatchReader},
        index::tests::TempIndex,
    };
    fn documents_with_enough_different_words_for_prefixes(prefixes: &[&str]) -> Vec<crate::Object> {
        let mut documents = Vec::new();
        for prefix in prefixes {
            for i in 0..50 {
                documents.push(
                    serde_json::json!({
                        "text": format!("{prefix}{i:x}"),
                    })
                    .as_object()
                    .unwrap()
                    .clone(),
                )
            }
        }
        documents
    }
    #[test]
    fn test_update() {
        let mut index = TempIndex::new();
        index.index_documents_config.words_prefix_threshold = Some(50);
        index.index_documents_config.autogenerate_docids = true;
        index
            .update_settings(|settings| {
                settings.set_searchable_fields(vec!["text".to_owned()]);
            })
            .unwrap();
        let batch_reader_from_documents = |documents| {
            let mut builder = DocumentsBatchBuilder::new(Vec::new());
            for object in documents {
                builder.append_json_object(&object).unwrap();
            }
            DocumentsBatchReader::from_reader(Cursor::new(builder.into_inner().unwrap())).unwrap()
        };
        let mut documents = documents_with_enough_different_words_for_prefixes(&["a", "be"]);
        // now we add some documents where the text should populate the word_prefix_pair_proximity_docids database
        documents.push(
            serde_json::json!({
                "text": "At an amazing and beautiful house"
            })
            .as_object()
            .unwrap()
            .clone(),
        );
        documents.push(
            serde_json::json!({
                "text": "The bell rings at 5 am"
            })
            .as_object()
            .unwrap()
            .clone(),
        );
        let documents = batch_reader_from_documents(documents);
        index.add_documents(documents).unwrap();
        db_snap!(index, word_prefix_pair_proximity_docids, "initial");
        let mut documents = documents_with_enough_different_words_for_prefixes(&["am", "an"]);
        documents.push(
            serde_json::json!({
                "text": "At an extraordinary house"
            })
            .as_object()
            .unwrap()
            .clone(),
        );
        let documents = batch_reader_from_documents(documents);
        index.add_documents(documents).unwrap();
        db_snap!(index, word_prefix_pair_proximity_docids, "update");
    }
 }