mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 05:54:30 +01:00
Remove fuzzing feature
This commit is contained in:
parent
3e3695445f
commit
5e8144b0e1
@ -1,4 +1,3 @@
|
|||||||
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
|
||||||
#![allow(clippy::type_complexity)]
|
#![allow(clippy::type_complexity)]
|
||||||
|
|
||||||
#[cfg(not(windows))]
|
#[cfg(not(windows))]
|
||||||
|
@ -1059,208 +1059,3 @@ mod tests {
|
|||||||
milli_snap!(format!("{index}"), "after_delete");
|
milli_snap!(format!("{index}"), "after_delete");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// fuzz tests
|
|
||||||
#[cfg(all(test, fuzzing))]
|
|
||||||
/**
|
|
||||||
Fuzz test for the incremental indxer.
|
|
||||||
|
|
||||||
The fuzz test uses fuzzcheck, a coverage-guided fuzzer.
|
|
||||||
See https://github.com/loiclec/fuzzcheck-rs and https://fuzzcheck.neocities.org
|
|
||||||
for more information.
|
|
||||||
|
|
||||||
It is only run when using the `cargo fuzzcheck` command line tool, which can be installed with:
|
|
||||||
```sh
|
|
||||||
cargo install cargo-fuzzcheck
|
|
||||||
```
|
|
||||||
To start the fuzz test, run (from the base folder or from milli/):
|
|
||||||
```sh
|
|
||||||
cargo fuzzcheck update::facet::incremental::fuzz::fuzz
|
|
||||||
```
|
|
||||||
and wait a couple minutes to make sure the code was thoroughly tested, then
|
|
||||||
hit `Ctrl-C` to stop the fuzzer. The corpus generated by the fuzzer is located in milli/fuzz.
|
|
||||||
|
|
||||||
To work on this module with rust-analyzer working properly, add the following to your .cargo/config.toml file:
|
|
||||||
```toml
|
|
||||||
[build]
|
|
||||||
rustflags = ["--cfg", "fuzzing"]
|
|
||||||
```
|
|
||||||
|
|
||||||
The fuzz test generates sequences of additions and deletions to the facet database and
|
|
||||||
ensures that:
|
|
||||||
1. its structure is still internally valid
|
|
||||||
2. its content is the same as a trivially correct implementation of the same database
|
|
||||||
*/
|
|
||||||
mod fuzz {
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
|
||||||
use std::iter::FromIterator;
|
|
||||||
use std::rc::Rc;
|
|
||||||
|
|
||||||
use fuzzcheck::mutators::integer::U8Mutator;
|
|
||||||
use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator};
|
|
||||||
use fuzzcheck::mutators::vector::VecMutator;
|
|
||||||
use fuzzcheck::DefaultMutator;
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use tempfile::TempDir;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
use crate::update::facet::test_helpers::FacetIndex;
|
|
||||||
#[derive(Default)]
|
|
||||||
pub struct TrivialDatabase<T> {
|
|
||||||
pub elements: BTreeMap<u16, BTreeMap<T, RoaringBitmap>>,
|
|
||||||
}
|
|
||||||
impl<T> TrivialDatabase<T>
|
|
||||||
where
|
|
||||||
T: Ord + Clone + Eq + std::fmt::Debug,
|
|
||||||
{
|
|
||||||
#[no_coverage]
|
|
||||||
pub fn insert(&mut self, field_id: u16, new_key: &T, new_values: &RoaringBitmap) {
|
|
||||||
if new_values.is_empty() {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
let values_field_id = self.elements.entry(field_id).or_default();
|
|
||||||
let values = values_field_id.entry(new_key.clone()).or_default();
|
|
||||||
*values |= new_values;
|
|
||||||
}
|
|
||||||
#[no_coverage]
|
|
||||||
pub fn delete(&mut self, field_id: u16, key: &T, values_to_remove: &RoaringBitmap) {
|
|
||||||
if let Some(values_field_id) = self.elements.get_mut(&field_id) {
|
|
||||||
if let Some(values) = values_field_id.get_mut(&key) {
|
|
||||||
*values -= values_to_remove;
|
|
||||||
if values.is_empty() {
|
|
||||||
values_field_id.remove(&key);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if values_field_id.is_empty() {
|
|
||||||
self.elements.remove(&field_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
|
|
||||||
struct Operation {
|
|
||||||
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(u8::default_mutator(), 0 ..= 5) })]
|
|
||||||
key: Vec<u8>,
|
|
||||||
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
|
||||||
group_size: u8,
|
|
||||||
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
|
||||||
max_group_size: u8,
|
|
||||||
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
|
||||||
min_level_size: u8,
|
|
||||||
#[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })]
|
|
||||||
field_id: u16,
|
|
||||||
kind: OperationKind,
|
|
||||||
}
|
|
||||||
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
|
|
||||||
enum OperationKind {
|
|
||||||
Insert(
|
|
||||||
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
|
|
||||||
Vec<u8>,
|
|
||||||
),
|
|
||||||
Delete(
|
|
||||||
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
|
|
||||||
Vec<u8>,
|
|
||||||
),
|
|
||||||
}
|
|
||||||
|
|
||||||
#[no_coverage]
|
|
||||||
fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation]) {
|
|
||||||
let index = FacetIndex::<BytesRefCodec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
|
|
||||||
let mut txn = index.env.write_txn().unwrap();
|
|
||||||
|
|
||||||
let mut trivial_db = TrivialDatabase::<Vec<u8>>::default();
|
|
||||||
let mut value_to_keys = HashMap::<u8, Vec<Vec<u8>>>::new();
|
|
||||||
for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in
|
|
||||||
operations
|
|
||||||
{
|
|
||||||
index.set_group_size(*group_size);
|
|
||||||
index.set_max_group_size(*max_group_size);
|
|
||||||
index.set_min_level_size(*min_level_size);
|
|
||||||
match kind {
|
|
||||||
OperationKind::Insert(values) => {
|
|
||||||
let mut bitmap = RoaringBitmap::new();
|
|
||||||
for value in values {
|
|
||||||
bitmap.insert(*value as u32);
|
|
||||||
value_to_keys.entry(*value).or_default().push(key.clone());
|
|
||||||
}
|
|
||||||
index.insert(&mut txn, *field_id, &key.as_slice(), &bitmap);
|
|
||||||
trivial_db.insert(*field_id, &key, &bitmap);
|
|
||||||
}
|
|
||||||
OperationKind::Delete(values) => {
|
|
||||||
let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32));
|
|
||||||
let mut values_per_key = HashMap::new();
|
|
||||||
|
|
||||||
for value in values {
|
|
||||||
if let Some(keys) = value_to_keys.get(&(value as u8)) {
|
|
||||||
for key in keys {
|
|
||||||
let values: &mut RoaringBitmap =
|
|
||||||
values_per_key.entry(key).or_default();
|
|
||||||
values.insert(value);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
for (key, values) in values_per_key {
|
|
||||||
index.delete(&mut txn, *field_id, &key.as_slice(), &values);
|
|
||||||
trivial_db.delete(*field_id, &key, &values);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (field_id, values_field_id) in trivial_db.elements.iter() {
|
|
||||||
let level0iter = index
|
|
||||||
.content
|
|
||||||
.as_polymorph()
|
|
||||||
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&mut txn, &field_id.to_be_bytes())
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
|
||||||
let (group_key, group_values) = group.unwrap();
|
|
||||||
let group_key =
|
|
||||||
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
|
|
||||||
assert_eq!(key, &group_key.left_bound);
|
|
||||||
assert_eq!(values, &group_values.bitmap);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
for (field_id, values_field_id) in trivial_db.elements.iter() {
|
|
||||||
let level0iter = index
|
|
||||||
.content
|
|
||||||
.as_polymorph()
|
|
||||||
.prefix_iter::<_, Bytes, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
|
||||||
let (group_key, group_values) = group.unwrap();
|
|
||||||
let group_key =
|
|
||||||
FacetGroupKeyCodec::<BytesRefCodec>::bytes_decode(group_key).unwrap();
|
|
||||||
assert_eq!(key, &group_key.left_bound);
|
|
||||||
assert_eq!(values, &group_values.bitmap);
|
|
||||||
}
|
|
||||||
index.verify_structure_validity(&txn, *field_id);
|
|
||||||
}
|
|
||||||
txn.abort().unwrap();
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
#[no_coverage]
|
|
||||||
fn fuzz() {
|
|
||||||
let tempdir = Rc::new(TempDir::new().unwrap());
|
|
||||||
let tempdir_cloned = tempdir.clone();
|
|
||||||
let result = fuzzcheck::fuzz_test(move |operations: &[Operation]| {
|
|
||||||
compare_with_trivial_database(tempdir_cloned.clone(), operations)
|
|
||||||
})
|
|
||||||
.default_mutator()
|
|
||||||
.serde_serializer()
|
|
||||||
.default_sensor_and_pool_with_custom_filter(|file, function| {
|
|
||||||
file == std::path::Path::new("milli/src/update/facet/incremental.rs")
|
|
||||||
&& !function.contains("serde")
|
|
||||||
&& !function.contains("tests::")
|
|
||||||
&& !function.contains("fuzz::")
|
|
||||||
&& !function.contains("display_bitmap")
|
|
||||||
})
|
|
||||||
.arguments_from_cargo_fuzzcheck()
|
|
||||||
.launch();
|
|
||||||
assert!(!result.found_test_failure);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -346,35 +346,6 @@ pub(crate) mod test_helpers {
|
|||||||
for<'a> BoundCodec:
|
for<'a> BoundCodec:
|
||||||
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
||||||
{
|
{
|
||||||
#[cfg(all(test, fuzzing))]
|
|
||||||
pub fn open_from_tempdir(
|
|
||||||
tempdir: Rc<tempfile::TempDir>,
|
|
||||||
group_size: u8,
|
|
||||||
max_group_size: u8,
|
|
||||||
min_level_size: u8,
|
|
||||||
) -> FacetIndex<BoundCodec> {
|
|
||||||
let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16
|
|
||||||
let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16
|
|
||||||
let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17
|
|
||||||
|
|
||||||
let mut options = heed::EnvOpenOptions::new();
|
|
||||||
let options = options.map_size(4096 * 4 * 10 * 1000);
|
|
||||||
unsafe {
|
|
||||||
options.flag(heed::flags::Flags::MdbAlwaysFreePages);
|
|
||||||
}
|
|
||||||
let env = options.open(tempdir.path()).unwrap();
|
|
||||||
let content = env.open_database(None).unwrap().unwrap();
|
|
||||||
|
|
||||||
FacetIndex {
|
|
||||||
content,
|
|
||||||
group_size: Cell::new(group_size),
|
|
||||||
max_group_size: Cell::new(max_group_size),
|
|
||||||
min_level_size: Cell::new(min_level_size),
|
|
||||||
_tempdir: tempdir,
|
|
||||||
env,
|
|
||||||
_phantom: PhantomData,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn new(
|
pub fn new(
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
max_group_size: u8,
|
max_group_size: u8,
|
||||||
@ -402,26 +373,6 @@ pub(crate) mod test_helpers {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(all(test, fuzzing))]
|
|
||||||
pub fn set_group_size(&self, group_size: u8) {
|
|
||||||
// 2 <= x <= 64
|
|
||||||
self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2)));
|
|
||||||
}
|
|
||||||
#[cfg(all(test, fuzzing))]
|
|
||||||
pub fn set_max_group_size(&self, max_group_size: u8) {
|
|
||||||
// 2*group_size <= x <= 128
|
|
||||||
let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size));
|
|
||||||
self.max_group_size.set(max_group_size);
|
|
||||||
if self.group_size.get() < max_group_size / 2 {
|
|
||||||
self.group_size.set(max_group_size / 2);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
#[cfg(all(test, fuzzing))]
|
|
||||||
pub fn set_min_level_size(&self, min_level_size: u8) {
|
|
||||||
// 1 <= x <= inf
|
|
||||||
self.min_level_size.set(std::cmp::max(1, min_level_size));
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert<'a>(
|
pub fn insert<'a>(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &'a mut RwTxn<'_>,
|
wtxn: &'a mut RwTxn<'_>,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user