mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-10 13:34:30 +01:00
Remove fuzzing feature
This commit is contained in:
parent
3e3695445f
commit
5e8144b0e1
@ -1,4 +1,3 @@
|
||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
||||
#![allow(clippy::type_complexity)]
|
||||
|
||||
#[cfg(not(windows))]
|
||||
|
@ -1059,208 +1059,3 @@ mod tests {
|
||||
milli_snap!(format!("{index}"), "after_delete");
|
||||
}
|
||||
}
|
||||
|
||||
// fuzz tests
|
||||
#[cfg(all(test, fuzzing))]
|
||||
/**
|
||||
Fuzz test for the incremental indxer.
|
||||
|
||||
The fuzz test uses fuzzcheck, a coverage-guided fuzzer.
|
||||
See https://github.com/loiclec/fuzzcheck-rs and https://fuzzcheck.neocities.org
|
||||
for more information.
|
||||
|
||||
It is only run when using the `cargo fuzzcheck` command line tool, which can be installed with:
|
||||
```sh
|
||||
cargo install cargo-fuzzcheck
|
||||
```
|
||||
To start the fuzz test, run (from the base folder or from milli/):
|
||||
```sh
|
||||
cargo fuzzcheck update::facet::incremental::fuzz::fuzz
|
||||
```
|
||||
and wait a couple minutes to make sure the code was thoroughly tested, then
|
||||
hit `Ctrl-C` to stop the fuzzer. The corpus generated by the fuzzer is located in milli/fuzz.
|
||||
|
||||
To work on this module with rust-analyzer working properly, add the following to your .cargo/config.toml file:
|
||||
```toml
|
||||
[build]
|
||||
rustflags = ["--cfg", "fuzzing"]
|
||||
```
|
||||
|
||||
The fuzz test generates sequences of additions and deletions to the facet database and
|
||||
ensures that:
|
||||
1. its structure is still internally valid
|
||||
2. its content is the same as a trivially correct implementation of the same database
|
||||
*/
|
||||
mod fuzz {
|
||||
use std::collections::{BTreeMap, HashMap};
|
||||
use std::iter::FromIterator;
|
||||
use std::rc::Rc;
|
||||
|
||||
use fuzzcheck::mutators::integer::U8Mutator;
|
||||
use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator};
|
||||
use fuzzcheck::mutators::vector::VecMutator;
|
||||
use fuzzcheck::DefaultMutator;
|
||||
use roaring::RoaringBitmap;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use super::*;
|
||||
use crate::update::facet::test_helpers::FacetIndex;
|
||||
#[derive(Default)]
|
||||
pub struct TrivialDatabase<T> {
|
||||
pub elements: BTreeMap<u16, BTreeMap<T, RoaringBitmap>>,
|
||||
}
|
||||
impl<T> TrivialDatabase<T>
|
||||
where
|
||||
T: Ord + Clone + Eq + std::fmt::Debug,
|
||||
{
|
||||
#[no_coverage]
|
||||
pub fn insert(&mut self, field_id: u16, new_key: &T, new_values: &RoaringBitmap) {
|
||||
if new_values.is_empty() {
|
||||
return;
|
||||
}
|
||||
let values_field_id = self.elements.entry(field_id).or_default();
|
||||
let values = values_field_id.entry(new_key.clone()).or_default();
|
||||
*values |= new_values;
|
||||
}
|
||||
#[no_coverage]
|
||||
pub fn delete(&mut self, field_id: u16, key: &T, values_to_remove: &RoaringBitmap) {
|
||||
if let Some(values_field_id) = self.elements.get_mut(&field_id) {
|
||||
if let Some(values) = values_field_id.get_mut(&key) {
|
||||
*values -= values_to_remove;
|
||||
if values.is_empty() {
|
||||
values_field_id.remove(&key);
|
||||
}
|
||||
}
|
||||
if values_field_id.is_empty() {
|
||||
self.elements.remove(&field_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
|
||||
struct Operation {
|
||||
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(u8::default_mutator(), 0 ..= 5) })]
|
||||
key: Vec<u8>,
|
||||
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
||||
group_size: u8,
|
||||
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
||||
max_group_size: u8,
|
||||
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
||||
min_level_size: u8,
|
||||
#[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })]
|
||||
field_id: u16,
|
||||
kind: OperationKind,
|
||||
}
|
||||
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
|
||||
enum OperationKind {
|
||||
Insert(
|
||||
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
|
||||
Vec<u8>,
|
||||
),
|
||||
Delete(
|
||||
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
|
||||
Vec<u8>,
|
||||
),
|
||||
}
|
||||
|
||||
#[no_coverage]
|
||||
fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) {
|
||||
let index = FacetIndex::<BytesRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
|
||||
let mut txn = index.env.write_txn().unwrap();
|
||||
|
||||
let mut trivial_db = TrivialDatabase::<Vec<u8>>::default();
|
||||
let mut value_to_keys = HashMap::<u8, Vec<Vec<u8>>>::new();
|
||||
for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in
|
||||
operations
|
||||
{
|
||||
index.set_group_size(*group_size);
|
||||
index.set_max_group_size(*max_group_size);
|
||||
index.set_min_level_size(*min_level_size);
|
||||
match kind {
|
||||
OperationKind::Insert(values) => {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
for value in values {
|
||||
bitmap.insert(*value as u32);
|
||||
value_to_keys.entry(*value).or_default().push(key.clone());
|
||||
}
|
||||
index.insert(&mut txn, *field_id, &key.as_slice(), &bitmap);
|
||||
trivial_db.insert(*field_id, &key, &bitmap);
|
||||
}
|
||||
OperationKind::Delete(values) => {
|
||||
let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32));
|
||||
let mut values_per_key = HashMap::new();
|
||||
|
||||
for value in values {
|
||||
if let Some(keys) = value_to_keys.get(&(value as u8)) {
|
||||
for key in keys {
|
||||
let values: &mut RoaringBitmap =
|
||||
values_per_key.entry(key).or_default();
|
||||
values.insert(value);
|
||||
}
|
||||
}
|
||||
}
|
||||
for (key, values) in values_per_key {
|
||||
index.delete(&mut txn, *field_id, &key.as_slice(), &values);
|
||||
trivial_db.delete(*field_id, &key, &values);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (field_id, values_field_id) in trivial_db.elements.iter() {
|
||||
let level0iter = index
|
||||
.content
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes())
|
||||
.unwrap();
|
||||
|
||||
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||
let (group_key, group_values) = group.unwrap();
|
||||
let group_key =
|
||||
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
|
||||
assert_eq!(key, &group_key.left_bound);
|
||||
assert_eq!(values, &group_values.bitmap);
|
||||
}
|
||||
}
|
||||
|
||||
for (field_id, values_field_id) in trivial_db.elements.iter() {
|
||||
let level0iter = index
|
||||
.content
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
|
||||
.unwrap();
|
||||
|
||||
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||
let (group_key, group_values) = group.unwrap();
|
||||
let group_key =
|
||||
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
|
||||
assert_eq!(key, &group_key.left_bound);
|
||||
assert_eq!(values, &group_values.bitmap);
|
||||
}
|
||||
index.verify_structure_validity(&txn, *field_id);
|
||||
}
|
||||
txn.abort().unwrap();
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[no_coverage]
|
||||
fn fuzz() {
|
||||
let tempdir = Rc::new(TempDir::new().unwrap());
|
||||
let tempdir_cloned = tempdir.clone();
|
||||
let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| {
|
||||
compare_with_trivial_database(tempdir_cloned.clone(), operations)
|
||||
})
|
||||
.default_mutator()
|
||||
.serde_serializer()
|
||||
.default_sensor_and_pool_with_custom_filter(|file, function| {
|
||||
file == std::path::Path::new("milli/src/update/facet/incremental.rs")
|
||||
&& !function.contains("serde")
|
||||
&& !function.contains("tests::")
|
||||
&& !function.contains("fuzz::")
|
||||
&& !function.contains("display_bitmap")
|
||||
})
|
||||
.arguments_from_cargo_fuzzcheck()
|
||||
.launch();
|
||||
assert!(!result.found_test_failure);
|
||||
}
|
||||
}
|
||||
|
@ -346,35 +346,6 @@ pub(crate) mod test_helpers {
|
||||
for<'a> BoundCodec:
|
||||
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
||||
{
|
||||
#[cfg(all(test, fuzzing))]
|
||||
pub fn open_from_tempdir(
|
||||
tempdir: Rc<tempfile::TempDir>,
|
||||
group_size: u8,
|
||||
max_group_size: u8,
|
||||
min_level_size: u8,
|
||||
) -> FacetIndex<BoundCodec> {
|
||||
let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16
|
||||
let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16
|
||||
let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17
|
||||
|
||||
let mut options = heed::EnvOpenOptions::new();
|
||||
let options = options.map_size(4096 * 4 * 10 * 1000);
|
||||
unsafe {
|
||||
options.flag(heed::flags::Flags::MdbAlwaysFreePages);
|
||||
}
|
||||
let env = options.open(tempdir.path()).unwrap();
|
||||
let content = env.open_database(None).unwrap().unwrap();
|
||||
|
||||
FacetIndex {
|
||||
content,
|
||||
group_size: Cell::new(group_size),
|
||||
max_group_size: Cell::new(max_group_size),
|
||||
min_level_size: Cell::new(min_level_size),
|
||||
_tempdir: tempdir,
|
||||
env,
|
||||
_phantom: PhantomData,
|
||||
}
|
||||
}
|
||||
pub fn new(
|
||||
group_size: u8,
|
||||
max_group_size: u8,
|
||||
@ -402,26 +373,6 @@ pub(crate) mod test_helpers {
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(all(test, fuzzing))]
|
||||
pub fn set_group_size(&self, group_size: u8) {
|
||||
// 2 <= x <= 64
|
||||
self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2)));
|
||||
}
|
||||
#[cfg(all(test, fuzzing))]
|
||||
pub fn set_max_group_size(&self, max_group_size: u8) {
|
||||
// 2*group_size <= x <= 128
|
||||
let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size));
|
||||
self.max_group_size.set(max_group_size);
|
||||
if self.group_size.get() < max_group_size / 2 {
|
||||
self.group_size.set(max_group_size / 2);
|
||||
}
|
||||
}
|
||||
#[cfg(all(test, fuzzing))]
|
||||
pub fn set_min_level_size(&self, min_level_size: u8) {
|
||||
// 1 <= x <= inf
|
||||
self.min_level_size.set(std::cmp::max(1, min_level_size));
|
||||
}
|
||||
|
||||
pub fn insert<'a>(
|
||||
&self,
|
||||
wtxn: &'a mut RwTxn<'_>,
|
||||
|
Loading…
x
Reference in New Issue
Block a user