Use real delete function in facet indexing fuzz tests

By deleting multiple docids at once instead of one-by-one
This commit is contained in:
Loïc Lecrenier 2022-10-12 10:23:40 +02:00
parent acc8caebe6
commit 2295e0e3ce
2 changed files with 39 additions and 28 deletions

View File

@ -1018,25 +1018,26 @@ mod tests {
txn.commit().unwrap(); txn.commit().unwrap();
milli_snap!(format!("{index}"), "after_delete"); milli_snap!(format!("{index}"), "after_delete");
} }
// fuzz tests
} }
// fuzz tests
#[cfg(all(test, fuzzing))] #[cfg(all(test, fuzzing))]
mod fuzz { mod fuzz {
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap}; use std::collections::{BTreeMap, HashMap};
use std::convert::TryFrom; use std::convert::TryFrom;
use std::iter::FromIterator;
use std::rc::Rc; use std::rc::Rc;
use fuzzcheck::mutators::integer::U8Mutator;
use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator}; use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator};
use fuzzcheck::mutators::vector::VecMutator;
use fuzzcheck::DefaultMutator; use fuzzcheck::DefaultMutator;
use heed::BytesEncode; use heed::BytesEncode;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use tempfile::TempDir; use tempfile::TempDir;
use super::*; use super::*;
use crate::milli_snap;
use crate::update::facet::tests::FacetIndex; use crate::update::facet::tests::FacetIndex;
struct NEU16Codec; struct NEU16Codec;
@ -1074,10 +1075,10 @@ mod fuzz {
*values |= new_values; *values |= new_values;
} }
#[no_coverage] #[no_coverage]
pub fn delete(&mut self, field_id: u16, key: T, value: u32) { pub fn delete(&mut self, field_id: u16, key: T, values_to_remove: &RoaringBitmap) {
if let Some(values_field_id) = self.elements.get_mut(&field_id) { if let Some(values_field_id) = self.elements.get_mut(&field_id) {
if let Some(values) = values_field_id.get_mut(&key) { if let Some(values) = values_field_id.get_mut(&key) {
values.remove(value); *values -= values_to_remove;
if values.is_empty() { if values.is_empty() {
values_field_id.remove(&key); values_field_id.remove(&key);
} }
@ -1103,8 +1104,14 @@ mod fuzz {
} }
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)] #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
enum OperationKind { enum OperationKind {
Insert(Vec<u8>), Insert(
Delete(u8), #[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
Vec<u8>,
),
Delete(
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
Vec<u8>,
),
} }
#[no_coverage] #[no_coverage]
@ -1131,13 +1138,23 @@ mod fuzz {
index.insert(&mut txn, *field_id, key, &bitmap); index.insert(&mut txn, *field_id, key, &bitmap);
trivial_db.insert(*field_id, *key, &bitmap); trivial_db.insert(*field_id, *key, &bitmap);
} }
OperationKind::Delete(value) => { OperationKind::Delete(values) => {
if let Some(keys) = value_to_keys.get(value) { let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32));
for key in keys { let mut values_per_key = HashMap::new();
index.delete_single_docid(&mut txn, *field_id, key, *value as u32);
trivial_db.delete(*field_id, *key, *value as u32); for value in values {
if let Some(keys) = value_to_keys.get(&(value as u8)) {
for key in keys {
let values: &mut RoaringBitmap =
values_per_key.entry(key).or_default();
values.insert(value);
}
} }
} }
for (key, values) in values_per_key {
index.delete(&mut txn, *field_id, &key, &values);
trivial_db.delete(*field_id, *key, &values);
}
} }
} }
} }
@ -1221,7 +1238,7 @@ mod fuzz {
{"key":166, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[67]}}, {"key":166, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[67]}},
{"key":64, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[61]}}, {"key":64, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[61]}},
{"key":183, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[210]}}, {"key":183, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[210]}},
{"key":250, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":50}} {"key":250, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":[50]}}
] ]
"#; "#;
let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap(); let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
@ -1250,7 +1267,7 @@ mod fuzz {
{"key":200, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[5]}}, {"key":200, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[5]}},
{"key":93, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[98]}}, {"key":93, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[98]}},
{"key":162, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[5]}}, {"key":162, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[5]}},
{"key":80, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":210}} {"key":80, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":[210]}}
] ]
"#; "#;
let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap(); let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
@ -1285,7 +1302,7 @@ mod fuzz {
let operations = r#"[ let operations = r#"[
{"key":63499, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[87]}}, {"key":63499, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[87]}},
{"key":25374, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[14]}}, {"key":25374, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[14]}},
{"key":64481, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Delete":87}}, {"key":64481, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Delete":[87]}},
{"key":23038, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[173]}}, {"key":23038, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[173]}},
{"key":14862, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[8]}}, {"key":14862, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[8]}},
{"key":13145, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[5,64]}}, {"key":13145, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[5,64]}},
@ -1337,7 +1354,7 @@ mod fuzz {
"max_group_size":4, "max_group_size":4,
"min_level_size":25, "min_level_size":25,
"field_id":3, "field_id":3,
"kind":{"Delete":11} "kind":{"Delete":[11]}
} }
] ]
"#; "#;

View File

@ -76,13 +76,14 @@ pub const FACET_MAX_GROUP_SIZE: u8 = 8;
pub const FACET_GROUP_SIZE: u8 = 4; pub const FACET_GROUP_SIZE: u8 = 4;
pub const FACET_MIN_LEVEL_SIZE: u8 = 5; pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
use std::fs::File;
use self::incremental::FacetsUpdateIncremental; use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk; use super::FacetsUpdateBulk;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::ByteSliceRefCodec;
use crate::{Index, Result}; use crate::{Index, Result};
use std::fs::File;
pub mod bulk; pub mod bulk;
pub mod delete; pub mod delete;
@ -153,6 +154,7 @@ impl<'i> FacetsUpdate<'i> {
pub(crate) mod tests { pub(crate) mod tests {
use std::cell::Cell; use std::cell::Cell;
use std::fmt::Display; use std::fmt::Display;
use std::iter::FromIterator;
use std::marker::PhantomData; use std::marker::PhantomData;
use std::rc::Rc; use std::rc::Rc;
@ -170,7 +172,7 @@ pub(crate) mod tests {
use crate::update::FacetsUpdateIncrementalInner; use crate::update::FacetsUpdateIncrementalInner;
use crate::CboRoaringBitmapCodec; use crate::CboRoaringBitmapCodec;
// A dummy index that only contains the facet database, used for testing /// A dummy index that only contains the facet database, used for testing
pub struct FacetIndex<BoundCodec> pub struct FacetIndex<BoundCodec>
where where
for<'a> BoundCodec: for<'a> BoundCodec:
@ -287,17 +289,9 @@ pub(crate) mod tests {
key: &'a <BoundCodec as BytesEncode<'a>>::EItem, key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
docid: u32, docid: u32,
) { ) {
let update = FacetsUpdateIncrementalInner { self.delete(wtxn, field_id, key, &RoaringBitmap::from_iter(std::iter::once(docid)))
db: self.content,
group_size: self.group_size.get(),
min_level_size: self.min_level_size.get(),
max_group_size: self.max_group_size.get(),
};
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
let mut docids = RoaringBitmap::new();
docids.insert(docid);
update.delete(wtxn, field_id, &key_bytes, &docids).unwrap();
} }
pub fn delete<'a>( pub fn delete<'a>(
&self, &self,
wtxn: &'a mut RwTxn, wtxn: &'a mut RwTxn,