Use real delete function in facet indexing fuzz tests

By deleting multiple docids at once instead of one-by-one
This commit is contained in:
Loïc Lecrenier 2022-10-12 10:23:40 +02:00
parent acc8caebe6
commit 2295e0e3ce
2 changed files with 39 additions and 28 deletions

View File

@ -1018,25 +1018,26 @@ mod tests {
txn.commit().unwrap();
milli_snap!(format!("{index}"), "after_delete");
}
// fuzz tests
}
// fuzz tests
#[cfg(all(test, fuzzing))]
mod fuzz {
use std::borrow::Cow;
use std::collections::{BTreeMap, HashMap};
use std::convert::TryFrom;
use std::iter::FromIterator;
use std::rc::Rc;
use fuzzcheck::mutators::integer::U8Mutator;
use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator};
use fuzzcheck::mutators::vector::VecMutator;
use fuzzcheck::DefaultMutator;
use heed::BytesEncode;
use roaring::RoaringBitmap;
use tempfile::TempDir;
use super::*;
use crate::milli_snap;
use crate::update::facet::tests::FacetIndex;
struct NEU16Codec;
@ -1074,10 +1075,10 @@ mod fuzz {
*values |= new_values;
}
#[no_coverage]
pub fn delete(&mut self, field_id: u16, key: T, value: u32) {
pub fn delete(&mut self, field_id: u16, key: T, values_to_remove: &RoaringBitmap) {
if let Some(values_field_id) = self.elements.get_mut(&field_id) {
if let Some(values) = values_field_id.get_mut(&key) {
values.remove(value);
*values -= values_to_remove;
if values.is_empty() {
values_field_id.remove(&key);
}
@ -1103,8 +1104,14 @@ mod fuzz {
}
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
enum OperationKind {
Insert(Vec<u8>),
Delete(u8),
Insert(
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
Vec<u8>,
),
Delete(
#[field_mutator(VecMutator<u8, U8Mutator> = { VecMutator::new(U8Mutator::default(), 0 ..= 10) })]
Vec<u8>,
),
}
#[no_coverage]
@ -1131,13 +1138,23 @@ mod fuzz {
index.insert(&mut txn, *field_id, key, &bitmap);
trivial_db.insert(*field_id, *key, &bitmap);
}
OperationKind::Delete(value) => {
if let Some(keys) = value_to_keys.get(value) {
for key in keys {
index.delete_single_docid(&mut txn, *field_id, key, *value as u32);
trivial_db.delete(*field_id, *key, *value as u32);
OperationKind::Delete(values) => {
let values = RoaringBitmap::from_iter(values.iter().copied().map(|x| x as u32));
let mut values_per_key = HashMap::new();
for value in values {
if let Some(keys) = value_to_keys.get(&(value as u8)) {
for key in keys {
let values: &mut RoaringBitmap =
values_per_key.entry(key).or_default();
values.insert(value);
}
}
}
for (key, values) in values_per_key {
index.delete(&mut txn, *field_id, &key, &values);
trivial_db.delete(*field_id, *key, &values);
}
}
}
}
@ -1221,7 +1238,7 @@ mod fuzz {
{"key":166, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[67]}},
{"key":64, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[61]}},
{"key":183, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[210]}},
{"key":250, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":50}}
{"key":250, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":[50]}}
]
"#;
let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
@ -1250,7 +1267,7 @@ mod fuzz {
{"key":200, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[5]}},
{"key":93, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[98]}},
{"key":162, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[5]}},
{"key":80, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":210}}
{"key":80, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":[210]}}
]
"#;
let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
@ -1285,7 +1302,7 @@ mod fuzz {
let operations = r#"[
{"key":63499, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[87]}},
{"key":25374, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[14]}},
{"key":64481, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Delete":87}},
{"key":64481, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Delete":[87]}},
{"key":23038, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[173]}},
{"key":14862, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[8]}},
{"key":13145, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[5,64]}},
@ -1337,7 +1354,7 @@ mod fuzz {
"max_group_size":4,
"min_level_size":25,
"field_id":3,
"kind":{"Delete":11}
"kind":{"Delete":[11]}
}
]
"#;

View File

@ -76,13 +76,14 @@ pub const FACET_MAX_GROUP_SIZE: u8 = 8;
pub const FACET_GROUP_SIZE: u8 = 4;
pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
use std::fs::File;
use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::{Index, Result};
use std::fs::File;
pub mod bulk;
pub mod delete;
@ -153,6 +154,7 @@ impl<'i> FacetsUpdate<'i> {
pub(crate) mod tests {
use std::cell::Cell;
use std::fmt::Display;
use std::iter::FromIterator;
use std::marker::PhantomData;
use std::rc::Rc;
@ -170,7 +172,7 @@ pub(crate) mod tests {
use crate::update::FacetsUpdateIncrementalInner;
use crate::CboRoaringBitmapCodec;
// A dummy index that only contains the facet database, used for testing
/// A dummy index that only contains the facet database, used for testing
pub struct FacetIndex<BoundCodec>
where
for<'a> BoundCodec:
@ -287,17 +289,9 @@ pub(crate) mod tests {
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
docid: u32,
) {
let update = FacetsUpdateIncrementalInner {
db: self.content,
group_size: self.group_size.get(),
min_level_size: self.min_level_size.get(),
max_group_size: self.max_group_size.get(),
};
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
let mut docids = RoaringBitmap::new();
docids.insert(docid);
update.delete(wtxn, field_id, &key_bytes, &docids).unwrap();
self.delete(wtxn, field_id, key, &RoaringBitmap::from_iter(std::iter::once(docid)))
}
pub fn delete<'a>(
&self,
wtxn: &'a mut RwTxn,