diff --git a/crates/milli/src/lib.rs b/crates/milli/src/lib.rs index db44f745f..ea88d2b78 100644 --- a/crates/milli/src/lib.rs +++ b/crates/milli/src/lib.rs @@ -1,4 +1,3 @@ -#![cfg_attr(all(test, fuzzing), feature(no_coverage))] #![allow(clippy::type_complexity)] #[cfg(not(windows))] diff --git a/crates/milli/src/update/facet/incremental.rs b/crates/milli/src/update/facet/incremental.rs index a1fa07fe3..41d1f62ab 100644 --- a/crates/milli/src/update/facet/incremental.rs +++ b/crates/milli/src/update/facet/incremental.rs @@ -1059,208 +1059,3 @@ mod tests { milli_snap!(format!("{index}"), "after_delete"); } } - -// fuzz tests -#[cfg(all(test, fuzzing))] -/** -Fuzz test for the incremental indxer. - -The fuzz test uses fuzzcheck, a coverage-guided fuzzer. -See https://github.com/loiclec/fuzzcheck-rs and https://fuzzcheck.neocities.org -for more information. - -It is only run when using the `cargo fuzzcheck` command line tool, which can be installed with: -```sh -cargo install cargo-fuzzcheck -``` -To start the fuzz test, run (from the base folder or from milli/): -```sh -cargo fuzzcheck update::facet::incremental::fuzz::fuzz -``` -and wait a couple minutes to make sure the code was thoroughly tested, then -hit `Ctrl-C` to stop the fuzzer. The corpus generated by the fuzzer is located in milli/fuzz. - -To work on this module with rust-analyzer working properly, add the following to your .cargo/config.toml file: -```toml -[build] -rustflags = ["--cfg", "fuzzing"] -``` - -The fuzz test generates sequences of additions and deletions to the facet database and -ensures that: -1. its structure is still internally valid -2. its content is the same as a trivially correct implementation of the same database -*/ -mod fuzz { - use std::collections::{BTreeMap, HashMap}; - use std::iter::FromIterator; - use std::rc::Rc; - - use fuzzcheck::mutators::integer::U8Mutator; - use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator}; - use fuzzcheck::mutators::vector::VecMutator; - use fuzzcheck::DefaultMutator; - use roaring::RoaringBitmap; - use tempfile::TempDir; - - use super::*; - use crate::update::facet::test_helpers::FacetIndex; - #[derive(Default)] - pub struct TrivialDatabase { - pub elements: BTreeMap>, - } - impl TrivialDatabase - where - T: Ord + Clone + Eq + std::fmt::Debug, - { - #[no_coverage] - pub fn insert(&mut self, field_id: u16, new_key: &T, new_values: &RoaringBitmap) { - if new_values.is_empty() { - return; - } - let values_field_id = self.elements.entry(field_id).or_default(); - let values = values_field_id.entry(new_key.clone()).or_default(); - *values |= new_values; - } - #[no_coverage] - pub fn delete(&mut self, field_id: u16, key: &T, values_to_remove: &RoaringBitmap) { - if let Some(values_field_id) = self.elements.get_mut(&field_id) { - if let Some(values) = values_field_id.get_mut(&key) { - *values -= values_to_remove; - if values.is_empty() { - values_field_id.remove(&key); - } - } - if values_field_id.is_empty() { - self.elements.remove(&field_id); - } - } - } - } - #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] - struct Operation { - #[field_mutator(VecMutator = { VecMutator::new(u8::default_mutator(), 0 ..= 5) })] - key: Vec, - #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] - group_size: u8, - #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] - max_group_size: u8, - #[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })] - min_level_size: u8, - #[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })] - field_id: u16, - kind: OperationKind, - } - #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] - enum OperationKind { - Insert( - #[field_mutator(VecMutator = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] - Vec, - ), - Delete( - #[field_mutator(VecMutator = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })] - Vec, - ), - } - - #[no_coverage] - fn compare_with_trivial_database(tempdir: Rc, operations: &[Operation]) { - let index = FacetIndex::::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten - let mut txn = index.env.write_txn().unwrap(); - - let mut trivial_db = TrivialDatabase::>::default(); - let mut value_to_keys = HashMap::>>::new(); - for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in - operations - { - index.set_group_size(*group_size); - index.set_max_group_size(*max_group_size); - index.set_min_level_size(*min_level_size); - match kind { - OperationKind::Insert(values) => { - let mut bitmap = RoaringBitmap::new(); - for value in values { - bitmap.insert(*value as u32); - value_to_keys.entry(*value).or_default().push(key.clone()); - } - index.insert(&mut txn, *field_id, &key.as_slice(), &bitmap); - trivial_db.insert(*field_id, &key, &bitmap); - } - OperationKind::Delete(values) => { - let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32)); - let mut values_per_key = HashMap::new(); - - for value in values { - if let Some(keys) = value_to_keys.get(&(value as u8)) { - for key in keys { - let values: &mut RoaringBitmap = - values_per_key.entry(key).or_default(); - values.insert(value); - } - } - } - for (key, values) in values_per_key { - index.delete(&mut txn, *field_id, &key.as_slice(), &values); - trivial_db.delete(*field_id, &key, &values); - } - } - } - } - - for (field_id, values_field_id) in trivial_db.elements.iter() { - let level0iter = index - .content - .as_polymorph() - .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes()) - .unwrap(); - - for ((key, values), group) in values_field_id.iter().zip(level0iter) { - let (group_key, group_values) = group.unwrap(); - let group_key = - FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); - assert_eq!(key, &group_key.left_bound); - assert_eq!(values, &group_values.bitmap); - } - } - - for (field_id, values_field_id) in trivial_db.elements.iter() { - let level0iter = index - .content - .as_polymorph() - .prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes()) - .unwrap(); - - for ((key, values), group) in values_field_id.iter().zip(level0iter) { - let (group_key, group_values) = group.unwrap(); - let group_key = - FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); - assert_eq!(key, &group_key.left_bound); - assert_eq!(values, &group_values.bitmap); - } - index.verify_structure_validity(&txn, *field_id); - } - txn.abort().unwrap(); - } - - #[test] - #[no_coverage] - fn fuzz() { - let tempdir = Rc::new(TempDir::new().unwrap()); - let tempdir_cloned = tempdir.clone(); - let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| { - compare_with_trivial_database(tempdir_cloned.clone(), operations) - }) - .default_mutator() - .serde_serializer() - .default_sensor_and_pool_with_custom_filter(|file, function| { - file == std::path::Path::new("milli/src/update/facet/incremental.rs") - && !function.contains("serde") - && !function.contains("tests::") - && !function.contains("fuzz::") - && !function.contains("display_bitmap") - }) - .arguments_from_cargo_fuzzcheck() - .launch(); - assert!(!result.found_test_failure); - } -} diff --git a/crates/milli/src/update/facet/mod.rs b/crates/milli/src/update/facet/mod.rs index 911296577..c78610e23 100644 --- a/crates/milli/src/update/facet/mod.rs +++ b/crates/milli/src/update/facet/mod.rs @@ -346,35 +346,6 @@ pub(crate) mod test_helpers { for<'a> BoundCodec: BytesEncode<'a> + BytesDecode<'a, DItem = >::EItem>, { - #[cfg(all(test, fuzzing))] - pub fn open_from_tempdir( - tempdir: Rc, - group_size: u8, - max_group_size: u8, - min_level_size: u8, - ) -> FacetIndex { - let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16 - let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16 - let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17 - - let mut options = heed::EnvOpenOptions::new(); - let options = options.map_size(4096 * 4 * 10 * 1000); - unsafe { - options.flag(heed::flags::Flags::MdbAlwaysFreePages); - } - let env = options.open(tempdir.path()).unwrap(); - let content = env.open_database(None).unwrap().unwrap(); - - FacetIndex { - content, - group_size: Cell::new(group_size), - max_group_size: Cell::new(max_group_size), - min_level_size: Cell::new(min_level_size), - _tempdir: tempdir, - env, - _phantom: PhantomData, - } - } pub fn new( group_size: u8, max_group_size: u8, @@ -402,26 +373,6 @@ pub(crate) mod test_helpers { } } - #[cfg(all(test, fuzzing))] - pub fn set_group_size(&self, group_size: u8) { - // 2 <= x <= 64 - self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2))); - } - #[cfg(all(test, fuzzing))] - pub fn set_max_group_size(&self, max_group_size: u8) { - // 2*group_size <= x <= 128 - let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size)); - self.max_group_size.set(max_group_size); - if self.group_size.get() < max_group_size / 2 { - self.group_size.set(max_group_size / 2); - } - } - #[cfg(all(test, fuzzing))] - pub fn set_min_level_size(&self, min_level_size: u8) { - // 1 <= x <= inf - self.min_level_size.set(std::cmp::max(1, min_level_size)); - } - pub fn insert<'a>( &self, wtxn: &'a mut RwTxn<'_>,