diff --git a/.gitignore b/.gitignore index 107b5bb36..02c4fcd79 100644 --- a/.gitignore +++ b/.gitignore @@ -6,3 +6,10 @@ *.csv *.mmdb *.svg + +# Snapshots +## ... large +*.full.snap + +# ... unreviewed +*.snap.new diff --git a/milli/src/documents/mod.rs b/milli/src/documents/mod.rs index c1580309a..5c83991c2 100644 --- a/milli/src/documents/mod.rs +++ b/milli/src/documents/mod.rs @@ -190,6 +190,17 @@ pub fn documents_batch_reader_from_objects( DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap() } +#[cfg(test)] +pub fn batch_reader_from_documents( + documents: &[Object], +) -> DocumentsBatchReader>> { + let mut builder = DocumentsBatchBuilder::new(Vec::new()); + for object in documents { + builder.append_json_object(&object).unwrap(); + } + DocumentsBatchReader::from_reader(std::io::Cursor::new(builder.into_inner().unwrap())).unwrap() +} + #[cfg(test)] mod test { use std::io::Cursor; diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index 77eeeb159..a881a155e 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -30,7 +30,7 @@ pub fn default_db_snapshot_settings_for_test(name: Option<&str>) -> insta::Setti #[macro_export] macro_rules! db_snap { - ($index:ident, $db_name:ident, $name:literal) => { + ($index:ident, $db_name:ident, $name:expr) => { let settings = $crate::snapshot_tests::default_db_snapshot_settings_for_test(Some( &format!("{}", $name), )); diff --git a/milli/src/update/facets.rs b/milli/src/update/facets.rs index 5892123eb..981fa819c 100644 --- a/milli/src/update/facets.rs +++ b/milli/src/update/facets.rs @@ -342,3 +342,91 @@ fn write_string_entry( writer.insert(&key, &data)?; Ok(()) } + +#[cfg(test)] +mod tests { + use std::num::NonZeroUsize; + + use crate::{db_snap, documents::batch_reader_from_documents, index::tests::TempIndex}; + + #[test] + fn test_facets_number() { + let test = + |name: &str, group_size: Option, min_level_size: Option| { + let mut index = TempIndex::new_with_map_size(4096 * 1000 * 10); // 40MB + index.index_documents_config.autogenerate_docids = true; + index.index_documents_config.facet_level_group_size = group_size; + index.index_documents_config.facet_min_level_size = min_level_size; + + index + .update_settings(|settings| { + settings.set_filterable_fields( + IntoIterator::into_iter(["facet".to_owned(), "facet2".to_owned()]) + .collect(), + ); + }) + .unwrap(); + + let mut documents = vec![]; + for i in 0..1_000 { + documents.push(serde_json::json!({ "facet": i }).as_object().unwrap().clone()); + } + for i in 0..100 { + documents.push(serde_json::json!({ "facet2": i }).as_object().unwrap().clone()); + } + let documents = batch_reader_from_documents(&documents); + + index.add_documents(documents).unwrap(); + + db_snap!(index, facet_id_f64_docids, name); + }; + + test("default", None, None); + test("tiny_groups_tiny_levels", NonZeroUsize::new(1), NonZeroUsize::new(1)); + test("small_groups_small_levels", NonZeroUsize::new(2), NonZeroUsize::new(2)); + test("small_groups_large_levels", NonZeroUsize::new(2), NonZeroUsize::new(128)); + test("large_groups_small_levels", NonZeroUsize::new(16), NonZeroUsize::new(2)); + test("large_groups_large_levels", NonZeroUsize::new(16), NonZeroUsize::new(256)); + } + + #[test] + fn test_facets_string() { + let test = |name: &str, + group_size: Option, + min_level_size: Option| { + let mut index = TempIndex::new_with_map_size(4096 * 1000 * 10); // 40MB + index.index_documents_config.autogenerate_docids = true; + index.index_documents_config.facet_level_group_size = group_size; + index.index_documents_config.facet_min_level_size = min_level_size; + + index + .update_settings(|settings| { + settings.set_filterable_fields( + IntoIterator::into_iter(["facet".to_owned(), "facet2".to_owned()]) + .collect(), + ); + }) + .unwrap(); + + let mut documents = vec![]; + for i in 0..100 { + documents.push( + serde_json::json!({ "facet": format!("s{i:X}") }).as_object().unwrap().clone(), + ); + } + for i in 0..10 { + documents.push( + serde_json::json!({ "facet2": format!("s{i:X}") }).as_object().unwrap().clone(), + ); + } + let documents = batch_reader_from_documents(&documents); + + index.add_documents(documents).unwrap(); + + db_snap!(index, facet_id_string_docids, name); + }; + + test("default", None, None); + test("tiny_groups_tiny_levels", NonZeroUsize::new(1), NonZeroUsize::new(1)); + } +} diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..373455db6 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/default/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +587899707db2848da3f18399e14ed4d0 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..c3415c320 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_large_levels/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +02bbf2ca1663cccea0e4c06d5ad06a45 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..78dad29f1 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/large_groups_small_levels/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +e68ea591e1af3e53e544dff9a1648e88 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..61a5908f4 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_large_levels/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +12a4bb0f5b95d7629c2b9a915150c0cf diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..961346de5 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/small_groups_small_levels/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +6438e94bc7fada13022e0efccdf294e0 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap new file mode 100644 index 000000000..2b7c1ef9c --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_number/tiny_groups_tiny_levels/facet_id_f64_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +5348bbc46b5384455b6a900666d2a502 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap new file mode 100644 index 000000000..901b86255 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_string/default/facet_id_string_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +faddef9eae5f2efacfec51f20f2e8cd6 diff --git a/milli/src/update/snapshots/update/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap b/milli/src/update/snapshots/update/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap new file mode 100644 index 000000000..aa6c85461 --- /dev/null +++ b/milli/src/update/snapshots/update/facets.rs/test_facets_string/tiny_groups_tiny_levels/facet_id_string_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/facets.rs +--- +ddb8fc987c5dc892337682595043858e