mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
Fix bugs in incremental facet indexing with variable parameters
e.g. add one facet value incrementally with a group_size = X and then add another one with group_size = Y It is not actually possible to do so with the public API of milli, but I wanted to make sure the algorithm worked well in those cases anyway. The bugs were found by fuzzing the code with fuzzcheck, which I've added to milli as a conditional dev-dependency. But it can be removed later.
This commit is contained in:
parent
de52a9bf75
commit
86d9f50b9c
2
.gitignore
vendored
2
.gitignore
vendored
@ -2,6 +2,8 @@
|
|||||||
/target
|
/target
|
||||||
/Cargo.lock
|
/Cargo.lock
|
||||||
|
|
||||||
|
milli/target/
|
||||||
|
|
||||||
# datasets
|
# datasets
|
||||||
*.csv
|
*.csv
|
||||||
*.mmdb
|
*.mmdb
|
||||||
|
@ -56,6 +56,9 @@ maplit = "1.0.2"
|
|||||||
md5 = "0.7.0"
|
md5 = "0.7.0"
|
||||||
rand = {version = "0.8.5", features = ["small_rng"] }
|
rand = {version = "0.8.5", features = ["small_rng"] }
|
||||||
|
|
||||||
|
[target.'cfg(fuzzing)'.dev-dependencies]
|
||||||
|
fuzzcheck = { path = "../../fuzzcheck-rs/fuzzcheck" }
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = [ "charabia/default" ]
|
default = [ "charabia/default" ]
|
||||||
|
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
#![cfg_attr(all(test, fuzzing), feature(no_coverage))]
|
||||||
|
|
||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod documents;
|
pub mod documents;
|
||||||
|
|
||||||
|
@ -9,8 +9,7 @@ use super::{Criterion, CriterionParameters, CriterionResult};
|
|||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
|
||||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||||
use crate::search::facet::ascending_facet_sort;
|
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||||
use crate::search::facet::descending_facet_sort;
|
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
|
|
||||||
|
@ -1,11 +1,13 @@
|
|||||||
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
|
use heed::Result;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level};
|
use super::{get_first_facet_value, get_highest_level};
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
|
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
use heed::Result;
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use std::ops::ControlFlow;
|
|
||||||
|
|
||||||
/// Call the given closure on the facet distribution of the candidate documents.
|
/// Call the given closure on the facet distribution of the candidate documents.
|
||||||
///
|
///
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
|
||||||
pub use self::filter::Filter;
|
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
|
||||||
pub use facet_sort_ascending::ascending_facet_sort;
|
pub use facet_sort_ascending::ascending_facet_sort;
|
||||||
pub use facet_sort_descending::descending_facet_sort;
|
pub use facet_sort_descending::descending_facet_sort;
|
||||||
use heed::types::{ByteSlice, DecodeIgnore};
|
use heed::types::{ByteSlice, DecodeIgnore};
|
||||||
use heed::{BytesDecode, RoTxn};
|
use heed::{BytesDecode, RoTxn};
|
||||||
|
|
||||||
|
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
||||||
|
pub use self::filter::Filter;
|
||||||
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
|
|
||||||
mod facet_distribution;
|
mod facet_distribution;
|
||||||
mod facet_distribution_iter;
|
mod facet_distribution_iter;
|
||||||
mod facet_range_search;
|
mod facet_range_search;
|
||||||
|
@ -14,6 +14,7 @@ use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
|||||||
|
|
||||||
enum InsertionResult {
|
enum InsertionResult {
|
||||||
InPlace,
|
InPlace,
|
||||||
|
Expand,
|
||||||
Insert,
|
Insert,
|
||||||
}
|
}
|
||||||
enum DeletionResult {
|
enum DeletionResult {
|
||||||
@ -251,6 +252,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
|
|
||||||
return Ok(InsertionResult::InPlace);
|
return Ok(InsertionResult::InPlace);
|
||||||
}
|
}
|
||||||
|
InsertionResult::Expand => {}
|
||||||
InsertionResult::Insert => {}
|
InsertionResult::Insert => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -258,7 +260,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
// of a new key. Therefore, it may be the case that we need to modify the left bound of the
|
// of a new key. Therefore, it may be the case that we need to modify the left bound of the
|
||||||
// insertion key (see documentation of `find_insertion_key_value` for an example of when that
|
// insertion key (see documentation of `find_insertion_key_value` for an example of when that
|
||||||
// could happen).
|
// could happen).
|
||||||
let insertion_key = {
|
let (insertion_key, insertion_key_was_modified) = {
|
||||||
let mut new_insertion_key = insertion_key.clone();
|
let mut new_insertion_key = insertion_key.clone();
|
||||||
let mut key_should_be_modified = false;
|
let mut key_should_be_modified = false;
|
||||||
|
|
||||||
@ -271,7 +273,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
assert!(is_deleted);
|
assert!(is_deleted);
|
||||||
self.db.put(txn, &new_insertion_key.as_ref(), &insertion_value)?;
|
self.db.put(txn, &new_insertion_key.as_ref(), &insertion_value)?;
|
||||||
}
|
}
|
||||||
new_insertion_key
|
(new_insertion_key, key_should_be_modified)
|
||||||
};
|
};
|
||||||
// Now we know that the insertion key contains the `facet_value`.
|
// Now we know that the insertion key contains the `facet_value`.
|
||||||
|
|
||||||
@ -280,20 +282,25 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
// 2. Merge the previous docids with the new one
|
// 2. Merge the previous docids with the new one
|
||||||
let mut updated_value = insertion_value;
|
let mut updated_value = insertion_value;
|
||||||
|
|
||||||
|
if matches!(result, InsertionResult::Insert) {
|
||||||
updated_value.size += 1;
|
updated_value.size += 1;
|
||||||
|
}
|
||||||
|
|
||||||
if updated_value.size < max_group_size {
|
if updated_value.size < max_group_size {
|
||||||
updated_value.bitmap |= docids;
|
updated_value.bitmap |= docids;
|
||||||
self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
|
self.db.put(txn, &insertion_key.as_ref(), &updated_value)?;
|
||||||
|
if insertion_key_was_modified {
|
||||||
|
return Ok(InsertionResult::Expand);
|
||||||
|
} else {
|
||||||
return Ok(InsertionResult::InPlace);
|
return Ok(InsertionResult::InPlace);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// We've increased the group size of the value and realised it has become greater than or equal to `max_group_size`
|
// We've increased the group size of the value and realised it has become greater than or equal to `max_group_size`
|
||||||
// Therefore it must be split into two nodes.
|
// Therefore it must be split into two nodes.
|
||||||
|
|
||||||
let size_left = max_group_size / 2;
|
let size_left = updated_value.size / 2;
|
||||||
let size_right = max_group_size - size_left;
|
let size_right = updated_value.size - size_left;
|
||||||
|
|
||||||
let level_below = level - 1;
|
let level_below = level - 1;
|
||||||
|
|
||||||
@ -303,7 +310,8 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
left_bound: insertion_key.left_bound.as_slice(),
|
left_bound: insertion_key.left_bound.as_slice(),
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut iter = self.db.range(&txn, &(start_key..))?.take(max_group_size as usize);
|
let mut iter =
|
||||||
|
self.db.range(&txn, &(start_key..))?.take((size_left as usize) + (size_right as usize));
|
||||||
|
|
||||||
let group_left = {
|
let group_left = {
|
||||||
let mut values_left = RoaringBitmap::new();
|
let mut values_left = RoaringBitmap::new();
|
||||||
@ -368,6 +376,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
self.insert_in_level(txn, field_id, highest_level as u8, facet_value, docids)?;
|
self.insert_in_level(txn, field_id, highest_level as u8, facet_value, docids)?;
|
||||||
match result {
|
match result {
|
||||||
InsertionResult::InPlace => return Ok(()),
|
InsertionResult::InPlace => return Ok(()),
|
||||||
|
InsertionResult::Expand => return Ok(()),
|
||||||
InsertionResult::Insert => {}
|
InsertionResult::Insert => {}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -393,8 +402,11 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
.as_polymorph()
|
.as_polymorph()
|
||||||
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &highest_level_prefix)?;
|
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &highest_level_prefix)?;
|
||||||
|
|
||||||
|
let nbr_new_groups = size_highest_level / self.group_size as usize;
|
||||||
|
let nbr_leftover_elements = size_highest_level % self.group_size as usize;
|
||||||
|
|
||||||
let mut to_add = vec![];
|
let mut to_add = vec![];
|
||||||
for _ in 0..self.min_level_size {
|
for _ in 0..nbr_new_groups {
|
||||||
let mut first_key = None;
|
let mut first_key = None;
|
||||||
let mut values = RoaringBitmap::new();
|
let mut values = RoaringBitmap::new();
|
||||||
for _ in 0..group_size {
|
for _ in 0..group_size {
|
||||||
@ -415,6 +427,30 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
let value = FacetGroupValue { size: group_size as u8, bitmap: values };
|
let value = FacetGroupValue { size: group_size as u8, bitmap: values };
|
||||||
to_add.push((key.into_owned(), value));
|
to_add.push((key.into_owned(), value));
|
||||||
}
|
}
|
||||||
|
// now we add the rest of the level, in case its size is > group_size * min_level_size
|
||||||
|
// this can indeed happen if the min_level_size parameter changes between two calls to `insert`
|
||||||
|
if nbr_leftover_elements > 0 {
|
||||||
|
let mut first_key = None;
|
||||||
|
let mut values = RoaringBitmap::new();
|
||||||
|
for _ in 0..nbr_leftover_elements {
|
||||||
|
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||||
|
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
||||||
|
.ok_or(Error::Encoding)?;
|
||||||
|
|
||||||
|
if first_key.is_none() {
|
||||||
|
first_key = Some(key_i);
|
||||||
|
}
|
||||||
|
values |= value_i.bitmap;
|
||||||
|
}
|
||||||
|
let key = FacetGroupKey {
|
||||||
|
field_id,
|
||||||
|
level: highest_level + 1,
|
||||||
|
left_bound: first_key.unwrap().left_bound,
|
||||||
|
};
|
||||||
|
let value = FacetGroupValue { size: nbr_leftover_elements as u8, bitmap: values };
|
||||||
|
to_add.push((key.into_owned(), value));
|
||||||
|
}
|
||||||
|
|
||||||
drop(groups_iter);
|
drop(groups_iter);
|
||||||
for (key, value) in to_add {
|
for (key, value) in to_add {
|
||||||
self.db.put(txn, &key.as_ref(), &value)?;
|
self.db.put(txn, &key.as_ref(), &value)?;
|
||||||
@ -983,243 +1019,345 @@ mod tests {
|
|||||||
|
|
||||||
// fuzz tests
|
// fuzz tests
|
||||||
}
|
}
|
||||||
// #[cfg(all(test, fuzzing))]
|
#[cfg(all(test, fuzzing))]
|
||||||
// mod fuzz {
|
mod fuzz {
|
||||||
// use crate::codec::U16Codec;
|
use std::borrow::Cow;
|
||||||
|
use std::collections::{BTreeMap, HashMap};
|
||||||
|
use std::convert::TryFrom;
|
||||||
|
use std::rc::Rc;
|
||||||
|
|
||||||
// use super::tests::verify_structure_validity;
|
use fuzzcheck::mutators::integer_within_range::{U16WithinRangeMutator, U8WithinRangeMutator};
|
||||||
// use super::*;
|
use fuzzcheck::DefaultMutator;
|
||||||
// use fuzzcheck::mutators::integer_within_range::U16WithinRangeMutator;
|
use heed::BytesEncode;
|
||||||
// use fuzzcheck::DefaultMutator;
|
use roaring::RoaringBitmap;
|
||||||
// use roaring::RoaringBitmap;
|
use tempfile::TempDir;
|
||||||
// use std::collections::BTreeMap;
|
|
||||||
// use std::collections::HashMap;
|
|
||||||
|
|
||||||
// #[derive(Default)]
|
use super::*;
|
||||||
// pub struct TrivialDatabase<T> {
|
use crate::milli_snap;
|
||||||
// pub elements: BTreeMap<u16, BTreeMap<T, RoaringBitmap>>,
|
use crate::update::facet::tests::FacetIndex;
|
||||||
// }
|
|
||||||
// impl<T> TrivialDatabase<T>
|
|
||||||
// where
|
|
||||||
// T: Ord + Clone + Copy + Eq + std::fmt::Debug,
|
|
||||||
// {
|
|
||||||
// pub fn insert(&mut self, field_id: u16, new_key: T, new_values: &RoaringBitmap) {
|
|
||||||
// if new_values.is_empty() {
|
|
||||||
// return;
|
|
||||||
// }
|
|
||||||
// let values_field_id = self.elements.entry(field_id).or_default();
|
|
||||||
// let values = values_field_id.entry(new_key).or_default();
|
|
||||||
// *values |= new_values;
|
|
||||||
// }
|
|
||||||
// pub fn delete(&mut self, field_id: u16, key: T, value: u32) {
|
|
||||||
// if let Some(values_field_id) = self.elements.get_mut(&field_id) {
|
|
||||||
// if let Some(values) = values_field_id.get_mut(&key) {
|
|
||||||
// values.remove(value);
|
|
||||||
// if values.is_empty() {
|
|
||||||
// values_field_id.remove(&key);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// if values_field_id.is_empty() {
|
|
||||||
// self.elements.remove(&field_id);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
|
|
||||||
// struct Operation<Key> {
|
|
||||||
// key: Key,
|
|
||||||
// #[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })]
|
|
||||||
// field_id: u16,
|
|
||||||
// kind: OperationKind,
|
|
||||||
// }
|
|
||||||
// #[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
|
|
||||||
// enum OperationKind {
|
|
||||||
// Insert(Vec<u8>),
|
|
||||||
// Delete(u8),
|
|
||||||
// }
|
|
||||||
|
|
||||||
// fn compare_with_trivial_database(
|
struct NEU16Codec;
|
||||||
// tempdir: Rc<TempDir>,
|
impl<'a> BytesEncode<'a> for NEU16Codec {
|
||||||
// group_size: u8,
|
type EItem = u16;
|
||||||
// max_group_size: u8,
|
#[no_coverage]
|
||||||
// operations: &[Operation<u16>],
|
fn bytes_encode(item: &'a Self::EItem) -> Option<std::borrow::Cow<'a, [u8]>> {
|
||||||
// ) {
|
Some(Cow::Owned(item.to_be_bytes().to_vec()))
|
||||||
// let index = FacetIndex::<OrderedF64Codec>::open_from_tempdir(tempdir, group_size, max_group_size);
|
}
|
||||||
// let mut trivial_db = TrivialDatabase::<u16>::default();
|
}
|
||||||
// let mut value_to_keys = HashMap::<u8, Vec<u16>>::new();
|
impl<'a> BytesDecode<'a> for NEU16Codec {
|
||||||
|
type DItem = u16;
|
||||||
|
#[no_coverage]
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let bytes = <[u8; 2]>::try_from(&bytes[0..=1]).unwrap();
|
||||||
|
Some(u16::from_be_bytes(bytes))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[derive(Default)]
|
||||||
|
pub struct TrivialDatabase<T> {
|
||||||
|
pub elements: BTreeMap<u16, BTreeMap<T, RoaringBitmap>>,
|
||||||
|
}
|
||||||
|
impl<T> TrivialDatabase<T>
|
||||||
|
where
|
||||||
|
T: Ord + Clone + Copy + Eq + std::fmt::Debug,
|
||||||
|
{
|
||||||
|
#[no_coverage]
|
||||||
|
pub fn insert(&mut self, field_id: u16, new_key: T, new_values: &RoaringBitmap) {
|
||||||
|
if new_values.is_empty() {
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
let values_field_id = self.elements.entry(field_id).or_default();
|
||||||
|
let values = values_field_id.entry(new_key).or_default();
|
||||||
|
*values |= new_values;
|
||||||
|
}
|
||||||
|
#[no_coverage]
|
||||||
|
pub fn delete(&mut self, field_id: u16, key: T, value: u32) {
|
||||||
|
if let Some(values_field_id) = self.elements.get_mut(&field_id) {
|
||||||
|
if let Some(values) = values_field_id.get_mut(&key) {
|
||||||
|
values.remove(value);
|
||||||
|
if values.is_empty() {
|
||||||
|
values_field_id.remove(&key);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if values_field_id.is_empty() {
|
||||||
|
self.elements.remove(&field_id);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
|
||||||
|
struct Operation<Key> {
|
||||||
|
key: Key,
|
||||||
|
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
||||||
|
group_size: u8,
|
||||||
|
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
||||||
|
max_group_size: u8,
|
||||||
|
#[field_mutator(U8WithinRangeMutator = { U8WithinRangeMutator::new(..32) })]
|
||||||
|
min_level_size: u8,
|
||||||
|
#[field_mutator(U16WithinRangeMutator = { U16WithinRangeMutator::new(..=3) })]
|
||||||
|
field_id: u16,
|
||||||
|
kind: OperationKind,
|
||||||
|
}
|
||||||
|
#[derive(Clone, DefaultMutator, serde::Serialize, serde::Deserialize)]
|
||||||
|
enum OperationKind {
|
||||||
|
Insert(Vec<u8>),
|
||||||
|
Delete(u8),
|
||||||
|
}
|
||||||
|
|
||||||
|
#[no_coverage]
|
||||||
|
fn compare_with_trivial_database(tempdir: Rc<TempDir>, operations: &[Operation<u16>]) {
|
||||||
|
let index = FacetIndex::<NEU16Codec>::open_from_tempdir(tempdir, 4, 8, 5); // dummy params, they'll be overwritten
|
||||||
// let mut txn = index.env.write_txn().unwrap();
|
// let mut txn = index.env.write_txn().unwrap();
|
||||||
// for Operation { key, field_id, kind } in operations {
|
let mut txn = index.env.write_txn().unwrap();
|
||||||
// match kind {
|
|
||||||
// OperationKind::Insert(values) => {
|
|
||||||
// let mut bitmap = RoaringBitmap::new();
|
|
||||||
// for value in values {
|
|
||||||
// bitmap.insert(*value as u32);
|
|
||||||
// value_to_keys.entry(*value).or_default().push(*key);
|
|
||||||
// }
|
|
||||||
// index.insert(&mut txn, *field_id, key, &bitmap);
|
|
||||||
// trivial_db.insert(*field_id, *key, &bitmap);
|
|
||||||
// }
|
|
||||||
// OperationKind::Delete(value) => {
|
|
||||||
// if let Some(keys) = value_to_keys.get(value) {
|
|
||||||
// for key in keys {
|
|
||||||
// index.delete(&mut txn, *field_id, key, *value as u32);
|
|
||||||
// trivial_db.delete(*field_id, *key, *value as u32);
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// for (field_id, values_field_id) in trivial_db.elements.iter() {
|
|
||||||
// let level0iter = index
|
|
||||||
// .db
|
|
||||||
// .content
|
|
||||||
// .as_polymorph()
|
|
||||||
// .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(
|
|
||||||
// &mut txn,
|
|
||||||
// &field_id.to_be_bytes(),
|
|
||||||
// )
|
|
||||||
// .unwrap();
|
|
||||||
|
|
||||||
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
let mut trivial_db = TrivialDatabase::<u16>::default();
|
||||||
// let (group_key, group_values) = group.unwrap();
|
let mut value_to_keys = HashMap::<u8, Vec<u16>>::new();
|
||||||
// let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
|
for Operation { key, group_size, max_group_size, min_level_size, field_id, kind } in
|
||||||
// assert_eq!(key, &group_key.left_bound);
|
operations
|
||||||
// assert_eq!(values, &group_values.bitmap);
|
{
|
||||||
// }
|
index.set_group_size(*group_size);
|
||||||
// }
|
index.set_max_group_size(*max_group_size);
|
||||||
|
index.set_min_level_size(*min_level_size);
|
||||||
|
match kind {
|
||||||
|
OperationKind::Insert(values) => {
|
||||||
|
let mut bitmap = RoaringBitmap::new();
|
||||||
|
for value in values {
|
||||||
|
bitmap.insert(*value as u32);
|
||||||
|
value_to_keys.entry(*value).or_default().push(*key);
|
||||||
|
}
|
||||||
|
index.insert(&mut txn, *field_id, key, &bitmap);
|
||||||
|
trivial_db.insert(*field_id, *key, &bitmap);
|
||||||
|
}
|
||||||
|
OperationKind::Delete(value) => {
|
||||||
|
if let Some(keys) = value_to_keys.get(value) {
|
||||||
|
for key in keys {
|
||||||
|
index.delete(&mut txn, *field_id, key, *value as u32);
|
||||||
|
trivial_db.delete(*field_id, *key, *value as u32);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// txn.commit().unwrap();
|
for (field_id, values_field_id) in trivial_db.elements.iter() {
|
||||||
// let mut txn = index.env.write_txn().unwrap();
|
let level0iter = index
|
||||||
// for (field_id, values_field_id) in trivial_db.elements.iter() {
|
.content
|
||||||
// let level0iter = index
|
.as_polymorph()
|
||||||
// .db
|
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(
|
||||||
// .content
|
&mut txn,
|
||||||
// .as_polymorph()
|
&field_id.to_be_bytes(),
|
||||||
// .prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
|
)
|
||||||
// .unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||||
// let (group_key, group_values) = group.unwrap();
|
let (group_key, group_values) = group.unwrap();
|
||||||
// let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
|
let group_key = FacetGroupKeyCodec::<NEU16Codec>::bytes_decode(group_key).unwrap();
|
||||||
// assert_eq!(key, &group_key.left_bound);
|
assert_eq!(key, &group_key.left_bound);
|
||||||
// assert_eq!(values, &group_values.bitmap);
|
assert_eq!(values, &group_values.bitmap);
|
||||||
// }
|
}
|
||||||
// index.verify_structure_validity(*field_id);
|
}
|
||||||
// }
|
|
||||||
|
|
||||||
// index.db.content.clear(&mut txn).unwrap();
|
for (field_id, values_field_id) in trivial_db.elements.iter() {
|
||||||
// txn.commit().unwrap();
|
let level0iter = index
|
||||||
// }
|
.content
|
||||||
|
.as_polymorph()
|
||||||
|
.prefix_iter::<_, ByteSlice, FacetGroupValueCodec>(&txn, &field_id.to_be_bytes())
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
// #[test]
|
for ((key, values), group) in values_field_id.iter().zip(level0iter) {
|
||||||
// fn fuzz() {
|
let (group_key, group_values) = group.unwrap();
|
||||||
// let tempdir = Rc::new(TempDir::new().unwrap());
|
let group_key = FacetGroupKeyCodec::<NEU16Codec>::bytes_decode(group_key).unwrap();
|
||||||
// let tempdir_cloned = tempdir.clone();
|
assert_eq!(key, &group_key.left_bound);
|
||||||
// let result = fuzzcheck::fuzz_test(move |x: &(u8, u8, Vec<Operation<u16>>)| {
|
assert_eq!(values, &group_values.bitmap);
|
||||||
// compare_with_trivial_database(tempdir_cloned.clone(), x.0, x.1, &x.2)
|
}
|
||||||
// })
|
index.verify_structure_validity(&txn, *field_id);
|
||||||
// .default_mutator()
|
}
|
||||||
// .serde_serializer()
|
txn.abort().unwrap();
|
||||||
// .default_sensor_and_pool_with_custom_filter(|file, function| {
|
}
|
||||||
// if file.is_relative()
|
|
||||||
// && !function.contains("serde")
|
|
||||||
// && !function.contains("tests::")
|
|
||||||
// && !function.contains("fuzz::")
|
|
||||||
// && !function.contains("display_bitmap")
|
|
||||||
// {
|
|
||||||
// true
|
|
||||||
// } else {
|
|
||||||
// false
|
|
||||||
// }
|
|
||||||
// })
|
|
||||||
// .arguments_from_cargo_fuzzcheck()
|
|
||||||
// .launch();
|
|
||||||
// assert!(!result.found_test_failure);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// #[test]
|
#[test]
|
||||||
// fn reproduce_bug() {
|
#[no_coverage]
|
||||||
// let operations = r#"
|
fn fuzz() {
|
||||||
// [
|
let tempdir = Rc::new(TempDir::new().unwrap());
|
||||||
// {"key":0, "field_id": 0, "kind":{"Insert":[109]}},
|
let tempdir_cloned = tempdir.clone();
|
||||||
// {"key":143, "field_id": 0, "kind":{"Insert":[243]}},
|
let result = fuzzcheck::fuzz_test(move |operations: &[Operation<u16>]| {
|
||||||
// {"key":90, "field_id": 0, "kind":{"Insert":[217]}},
|
compare_with_trivial_database(tempdir_cloned.clone(), operations)
|
||||||
// {"key":172, "field_id": 0, "kind":{"Insert":[94]}},
|
})
|
||||||
// {"key":27, "field_id": 0, "kind":{"Insert":[4]}},
|
.default_mutator()
|
||||||
// {"key":124, "field_id": 0, "kind":{"Insert":[0]}},
|
.serde_serializer()
|
||||||
// {"key":123, "field_id": 0, "kind":{"Insert":[0]}},
|
.default_sensor_and_pool_with_custom_filter(|file, function| {
|
||||||
// {"key":67, "field_id": 0, "kind":{"Insert":[109]}},
|
file == std::path::Path::new("milli/src/update/facet/incremental.rs")
|
||||||
// {"key":13, "field_id": 0, "kind":{"Insert":[0]}},
|
&& !function.contains("serde")
|
||||||
// {"key":162, "field_id": 0, "kind":{"Insert":[213]}},
|
&& !function.contains("tests::")
|
||||||
// {"key":235, "field_id": 0, "kind":{"Insert":[67]}},
|
&& !function.contains("fuzz::")
|
||||||
// {"key":251, "field_id": 0, "kind":{"Insert":[50]}},
|
&& !function.contains("display_bitmap")
|
||||||
// {"key":218, "field_id": 0, "kind":{"Insert":[164]}},
|
})
|
||||||
// {"key":166, "field_id": 0, "kind":{"Insert":[67]}},
|
.arguments_from_cargo_fuzzcheck()
|
||||||
// {"key":64, "field_id": 0, "kind":{"Insert":[61]}},
|
.launch();
|
||||||
// {"key":183, "field_id": 0, "kind":{"Insert":[210]}},
|
assert!(!result.found_test_failure);
|
||||||
// {"key":250, "field_id": 0, "kind":{"Delete":50}}
|
}
|
||||||
// ]
|
|
||||||
// "#;
|
|
||||||
// let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
|
|
||||||
// let tempdir = TempDir::new().unwrap();
|
|
||||||
// compare_with_trivial_database(Rc::new(tempdir), 4, 8, &operations);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// #[test]
|
#[test]
|
||||||
// fn reproduce_bug2() {
|
#[no_coverage]
|
||||||
// let operations = r#"
|
fn reproduce_bug1() {
|
||||||
// [
|
let operations = r#"
|
||||||
// {"key":102, "field_id": 0, "kind":{"Insert":[122]}},
|
[
|
||||||
// {"key":73, "field_id": 0, "kind":{"Insert":[132]}},
|
{"key":0, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[109]}},
|
||||||
// {"key":20, "field_id": 0, "kind":{"Insert":[215]}},
|
{"key":143, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[243]}},
|
||||||
// {"key":39, "field_id": 0, "kind":{"Insert":[152]}},
|
{"key":90, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[217]}},
|
||||||
// {"key":151, "field_id": 0, "kind":{"Insert":[226]}},
|
{"key":172, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[94]}},
|
||||||
// {"key":17, "field_id": 0, "kind":{"Insert":[101]}},
|
{"key":27, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[4]}},
|
||||||
// {"key":74, "field_id": 0, "kind":{"Insert":[210]}},
|
{"key":124, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[0]}},
|
||||||
// {"key":2, "field_id": 0, "kind":{"Insert":[130]}},
|
{"key":123, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[0]}},
|
||||||
// {"key":64, "field_id": 0, "kind":{"Insert":[180]}},
|
{"key":67, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[109]}},
|
||||||
// {"key":83, "field_id": 0, "kind":{"Insert":[250]}},
|
{"key":13, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[0]}},
|
||||||
// {"key":80, "field_id": 0, "kind":{"Insert":[210]}},
|
{"key":162, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[213]}},
|
||||||
// {"key":113, "field_id": 0, "kind":{"Insert":[63]}},
|
{"key":235, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[67]}},
|
||||||
// {"key":201, "field_id": 0, "kind":{"Insert":[210]}},
|
{"key":251, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[50]}},
|
||||||
// {"key":200, "field_id": 0, "kind":{"Insert":[5]}},
|
{"key":218, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[164]}},
|
||||||
// {"key":93, "field_id": 0, "kind":{"Insert":[98]}},
|
{"key":166, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[67]}},
|
||||||
// {"key":162, "field_id": 0, "kind":{"Insert":[5]}},
|
{"key":64, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[61]}},
|
||||||
// {"key":80, "field_id": 0, "kind":{"Delete":210}}
|
{"key":183, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[210]}},
|
||||||
// ]
|
{"key":250, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":50}}
|
||||||
// "#;
|
]
|
||||||
// let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
|
"#;
|
||||||
// let tempdir = TempDir::new().unwrap();
|
let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
|
||||||
// compare_with_trivial_database(Rc::new(tempdir), 4, 8, &operations);
|
let tempdir = TempDir::new().unwrap();
|
||||||
// }
|
compare_with_trivial_database(Rc::new(tempdir), &operations);
|
||||||
// #[test]
|
}
|
||||||
// fn reproduce_bug3() {
|
|
||||||
// let operations = r#"
|
|
||||||
// [
|
|
||||||
// {"key":27488, "field_id": 0, "kind":{"Insert":[206]}},
|
|
||||||
// {"key":64716, "field_id": 0, "kind":{"Insert":[216]}},
|
|
||||||
// {"key":60886, "field_id": 0, "kind":{"Insert":[206]}},
|
|
||||||
// {"key":59509, "field_id": 0, "kind":{"Insert":[187,231]}},
|
|
||||||
// {"key":55057, "field_id": 0, "kind":{"Insert":[37]}},
|
|
||||||
// {"key":45200, "field_id": 0, "kind":{"Insert":[206]}},
|
|
||||||
// {"key":55056, "field_id": 0, "kind":{"Insert":[37]}},
|
|
||||||
// {"key":63679, "field_id": 0, "kind":{"Insert":[206]}},
|
|
||||||
// {"key":52155, "field_id": 0, "kind":{"Insert":[74]}},
|
|
||||||
// {"key":20648, "field_id": 0, "kind":{"Insert":[47,138,157]}}
|
|
||||||
// ]
|
|
||||||
// "#;
|
|
||||||
// let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
|
|
||||||
// let tempdir = TempDir::new().unwrap();
|
|
||||||
// compare_with_trivial_database(Rc::new(tempdir), 0, 7, &operations);
|
|
||||||
// }
|
|
||||||
|
|
||||||
// #[test]
|
#[test]
|
||||||
// fn reproduce_bug4() {
|
#[no_coverage]
|
||||||
// let operations = r#"
|
fn reproduce_bug2() {
|
||||||
// [{"key":63499, "field_id": 0, "kind":{"Insert":[87]}},{"key":25374, "field_id": 0, "kind":{"Insert":[14]}},{"key":64481, "field_id": 0, "kind":{"Delete":87}},{"key":23038, "field_id": 0, "kind":{"Insert":[173]}},{"key":14862, "field_id": 0, "kind":{"Insert":[8]}},{"key":13145, "field_id": 0, "kind":{"Insert":[5,64]}},{"key":23446, "field_id": 0, "kind":{"Insert":[86,59]}},{"key":17972, "field_id": 0, "kind":{"Insert":[58,137]}},{"key":21273, "field_id": 0, "kind":{"Insert":[121,132,81,147]}},{"key":28264, "field_id": 0, "kind":{"Insert":[36]}},{"key":46659, "field_id": 0, "kind":{"Insert":[]}}]
|
let operations = r#"
|
||||||
// "#;
|
[
|
||||||
// let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
|
{"key":102, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[122]}},
|
||||||
// let tempdir = TempDir::new().unwrap();
|
{"key":73, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[132]}},
|
||||||
// compare_with_trivial_database(Rc::new(tempdir), 2, 1, &operations);
|
{"key":20, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[215]}},
|
||||||
// }
|
{"key":39, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[152]}},
|
||||||
// }
|
{"key":151, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[226]}},
|
||||||
|
{"key":17, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[101]}},
|
||||||
|
{"key":74, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[210]}},
|
||||||
|
{"key":2, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[130]}},
|
||||||
|
{"key":64, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[180]}},
|
||||||
|
{"key":83, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[250]}},
|
||||||
|
{"key":80, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[210]}},
|
||||||
|
{"key":113, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[63]}},
|
||||||
|
{"key":201, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[210]}},
|
||||||
|
{"key":200, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[5]}},
|
||||||
|
{"key":93, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[98]}},
|
||||||
|
{"key":162, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Insert":[5]}},
|
||||||
|
{"key":80, "field_id": 0, "group_size":4, "max_group_size":8, "min_level_size":5, "kind":{"Delete":210}}
|
||||||
|
]
|
||||||
|
"#;
|
||||||
|
let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
|
||||||
|
let tempdir = TempDir::new().unwrap();
|
||||||
|
compare_with_trivial_database(Rc::new(tempdir), &operations);
|
||||||
|
}
|
||||||
|
#[test]
|
||||||
|
#[no_coverage]
|
||||||
|
fn reproduce_bug3() {
|
||||||
|
let operations = r#"
|
||||||
|
[
|
||||||
|
{"key":27488, "field_id": 0, "group_size":0, "max_group_size":7, "min_level_size":0, "kind":{"Insert":[206]}},
|
||||||
|
{"key":64716, "field_id": 0, "group_size":0, "max_group_size":7, "min_level_size":0, "kind":{"Insert":[216]}},
|
||||||
|
{"key":60886, "field_id": 0, "group_size":0, "max_group_size":7, "min_level_size":0, "kind":{"Insert":[206]}},
|
||||||
|
{"key":59509, "field_id": 0, "group_size":0, "max_group_size":7, "min_level_size":0, "kind":{"Insert":[187,231]}},
|
||||||
|
{"key":55057, "field_id": 0, "group_size":0, "max_group_size":7, "min_level_size":0, "kind":{"Insert":[37]}},
|
||||||
|
{"key":45200, "field_id": 0, "group_size":0, "max_group_size":7, "min_level_size":0, "kind":{"Insert":[206]}},
|
||||||
|
{"key":55056, "field_id": 0, "group_size":0, "max_group_size":7, "min_level_size":0, "kind":{"Insert":[37]}},
|
||||||
|
{"key":63679, "field_id": 0, "group_size":0, "max_group_size":7, "min_level_size":0, "kind":{"Insert":[206]}},
|
||||||
|
{"key":52155, "field_id": 0, "group_size":0, "max_group_size":7, "min_level_size":0, "kind":{"Insert":[74]}},
|
||||||
|
{"key":20648, "field_id": 0, "group_size":0, "max_group_size":7, "min_level_size":0, "kind":{"Insert":[47,138,157]}}
|
||||||
|
]
|
||||||
|
"#;
|
||||||
|
let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
|
||||||
|
let tempdir = TempDir::new().unwrap();
|
||||||
|
compare_with_trivial_database(Rc::new(tempdir), &operations);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[no_coverage]
|
||||||
|
fn reproduce_bug4() {
|
||||||
|
let operations = r#"[
|
||||||
|
{"key":63499, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[87]}},
|
||||||
|
{"key":25374, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[14]}},
|
||||||
|
{"key":64481, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Delete":87}},
|
||||||
|
{"key":23038, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[173]}},
|
||||||
|
{"key":14862, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[8]}},
|
||||||
|
{"key":13145, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[5,64]}},
|
||||||
|
{"key":23446, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[86,59]}},
|
||||||
|
{"key":17972, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[58,137]}},
|
||||||
|
{"key":21273, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[121,132,81,147]}},
|
||||||
|
{"key":28264, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[36]}},
|
||||||
|
{"key":46659, "field_id": 0, "group_size":2, "max_group_size":1, "min_level_size":0, "kind":{"Insert":[]}}
|
||||||
|
]
|
||||||
|
"#;
|
||||||
|
let operations: Vec<Operation<u16>> = serde_json::from_str(operations).unwrap();
|
||||||
|
let tempdir = TempDir::new().unwrap();
|
||||||
|
compare_with_trivial_database(Rc::new(tempdir), &operations);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[no_coverage]
|
||||||
|
fn reproduce_bug5() {
|
||||||
|
let input = r#"
|
||||||
|
[
|
||||||
|
{
|
||||||
|
"key":3438,
|
||||||
|
"group_size":11,
|
||||||
|
"max_group_size":0,
|
||||||
|
"min_level_size":17,
|
||||||
|
"field_id":3,
|
||||||
|
"kind":{"Insert":[198]}
|
||||||
|
},
|
||||||
|
|
||||||
|
{
|
||||||
|
"key":47098,
|
||||||
|
"group_size":0,
|
||||||
|
"max_group_size":8,
|
||||||
|
"min_level_size":0,
|
||||||
|
"field_id":3,
|
||||||
|
"kind":{"Insert":[11]}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key":22453,
|
||||||
|
"group_size":0,
|
||||||
|
"max_group_size":0,
|
||||||
|
"min_level_size":0,
|
||||||
|
"field_id":3,
|
||||||
|
"kind":{"Insert":[145]}
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"key":14105,
|
||||||
|
"group_size":14,
|
||||||
|
"max_group_size":4,
|
||||||
|
"min_level_size":25,
|
||||||
|
"field_id":3,
|
||||||
|
"kind":{"Delete":11}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
"#;
|
||||||
|
let operations: Vec<Operation<u16>> = serde_json::from_str(input).unwrap();
|
||||||
|
let tmpdir = TempDir::new().unwrap();
|
||||||
|
compare_with_trivial_database(Rc::new(tmpdir), &operations);
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
#[no_coverage]
|
||||||
|
fn reproduce_bug6() {
|
||||||
|
let input = r#"
|
||||||
|
[
|
||||||
|
{"key":45720,"group_size":1,"max_group_size":4,"min_level_size":0,"field_id":0,"kind":{"Insert":[120]}},
|
||||||
|
{"key":37463,"group_size":1,"max_group_size":4,"min_level_size":0,"field_id":0,"kind":{"Insert":[187]}},
|
||||||
|
{"key":21512,"group_size":23,"max_group_size":20,"min_level_size":23,"field_id":0,"kind":{"Insert":[181]}},
|
||||||
|
{"key":21511,"group_size":23,"max_group_size":20,"min_level_size":23,"field_id":0,"kind":{"Insert":[181]}},
|
||||||
|
{"key":37737,"group_size":12,"max_group_size":0,"min_level_size":6,"field_id":0,"kind":{"Insert":[181]}},
|
||||||
|
{"key":53042,"group_size":23,"max_group_size":20,"min_level_size":23,"field_id":0,"kind":{"Insert":[181]}}
|
||||||
|
]
|
||||||
|
"#;
|
||||||
|
let operations: Vec<Operation<u16>> = serde_json::from_str(input).unwrap();
|
||||||
|
let tmpdir = TempDir::new().unwrap();
|
||||||
|
compare_with_trivial_database(Rc::new(tmpdir), &operations);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@ -145,6 +145,7 @@ impl<'i> FacetsUpdate<'i> {
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
pub(crate) mod tests {
|
pub(crate) mod tests {
|
||||||
|
use std::cell::Cell;
|
||||||
use std::fmt::Display;
|
use std::fmt::Display;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
use std::rc::Rc;
|
use std::rc::Rc;
|
||||||
@ -170,9 +171,9 @@ pub(crate) mod tests {
|
|||||||
{
|
{
|
||||||
pub env: Env,
|
pub env: Env,
|
||||||
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
pub group_size: u8,
|
pub group_size: Cell<u8>,
|
||||||
pub min_level_size: u8,
|
pub min_level_size: Cell<u8>,
|
||||||
pub max_group_size: u8,
|
pub max_group_size: Cell<u8>,
|
||||||
_tempdir: Rc<tempfile::TempDir>,
|
_tempdir: Rc<tempfile::TempDir>,
|
||||||
_phantom: PhantomData<BoundCodec>,
|
_phantom: PhantomData<BoundCodec>,
|
||||||
}
|
}
|
||||||
@ -189,9 +190,9 @@ pub(crate) mod tests {
|
|||||||
max_group_size: u8,
|
max_group_size: u8,
|
||||||
min_level_size: u8,
|
min_level_size: u8,
|
||||||
) -> FacetIndex<BoundCodec> {
|
) -> FacetIndex<BoundCodec> {
|
||||||
let group_size = std::cmp::min(127, std::cmp::max(group_size, 2)); // 2 <= x <= 127
|
let group_size = std::cmp::min(16, std::cmp::max(group_size, 2)); // 2 <= x <= 16
|
||||||
let max_group_size = std::cmp::min(127, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 127
|
let max_group_size = std::cmp::min(16, std::cmp::max(group_size * 2, max_group_size)); // 2*group_size <= x <= 16
|
||||||
let min_level_size = std::cmp::max(1, min_level_size); // 1 <= x <= inf
|
let min_level_size = std::cmp::min(17, std::cmp::max(1, min_level_size)); // 1 <= x <= 17
|
||||||
|
|
||||||
let mut options = heed::EnvOpenOptions::new();
|
let mut options = heed::EnvOpenOptions::new();
|
||||||
let options = options.map_size(4096 * 4 * 10 * 100);
|
let options = options.map_size(4096 * 4 * 10 * 100);
|
||||||
@ -202,13 +203,11 @@ pub(crate) mod tests {
|
|||||||
let content = env.open_database(None).unwrap().unwrap();
|
let content = env.open_database(None).unwrap().unwrap();
|
||||||
|
|
||||||
FacetIndex {
|
FacetIndex {
|
||||||
db: Database {
|
|
||||||
content,
|
content,
|
||||||
group_size,
|
group_size: Cell::new(group_size),
|
||||||
max_group_size,
|
max_group_size: Cell::new(max_group_size),
|
||||||
min_level_size,
|
min_level_size: Cell::new(min_level_size),
|
||||||
_tempdir: tempdir,
|
_tempdir: tempdir,
|
||||||
},
|
|
||||||
env,
|
env,
|
||||||
_phantom: PhantomData,
|
_phantom: PhantomData,
|
||||||
}
|
}
|
||||||
@ -229,14 +228,32 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
FacetIndex {
|
FacetIndex {
|
||||||
content,
|
content,
|
||||||
group_size,
|
group_size: Cell::new(group_size),
|
||||||
max_group_size,
|
max_group_size: Cell::new(max_group_size),
|
||||||
min_level_size,
|
min_level_size: Cell::new(min_level_size),
|
||||||
_tempdir: Rc::new(tempdir),
|
_tempdir: Rc::new(tempdir),
|
||||||
env,
|
env,
|
||||||
_phantom: PhantomData,
|
_phantom: PhantomData,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn set_group_size(&self, group_size: u8) {
|
||||||
|
// 2 <= x <= 64
|
||||||
|
self.group_size.set(std::cmp::min(64, std::cmp::max(group_size, 2)));
|
||||||
|
}
|
||||||
|
pub fn set_max_group_size(&self, max_group_size: u8) {
|
||||||
|
// 2*group_size <= x <= 128
|
||||||
|
let max_group_size = std::cmp::max(4, std::cmp::min(128, max_group_size));
|
||||||
|
self.max_group_size.set(max_group_size);
|
||||||
|
if self.group_size.get() < max_group_size / 2 {
|
||||||
|
self.group_size.set(max_group_size / 2);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn set_min_level_size(&self, min_level_size: u8) {
|
||||||
|
// 1 <= x <= inf
|
||||||
|
self.min_level_size.set(std::cmp::max(1, min_level_size));
|
||||||
|
}
|
||||||
|
|
||||||
pub fn insert<'a>(
|
pub fn insert<'a>(
|
||||||
&self,
|
&self,
|
||||||
wtxn: &'a mut RwTxn,
|
wtxn: &'a mut RwTxn,
|
||||||
@ -246,9 +263,9 @@ pub(crate) mod tests {
|
|||||||
) {
|
) {
|
||||||
let update = FacetsUpdateIncrementalInner {
|
let update = FacetsUpdateIncrementalInner {
|
||||||
db: self.content,
|
db: self.content,
|
||||||
group_size: self.group_size,
|
group_size: self.group_size.get(),
|
||||||
min_level_size: self.min_level_size,
|
min_level_size: self.min_level_size.get(),
|
||||||
max_group_size: self.max_group_size,
|
max_group_size: self.max_group_size.get(),
|
||||||
};
|
};
|
||||||
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
||||||
update.insert(wtxn, field_id, &key_bytes, docids).unwrap();
|
update.insert(wtxn, field_id, &key_bytes, docids).unwrap();
|
||||||
@ -262,9 +279,9 @@ pub(crate) mod tests {
|
|||||||
) {
|
) {
|
||||||
let update = FacetsUpdateIncrementalInner {
|
let update = FacetsUpdateIncrementalInner {
|
||||||
db: self.content,
|
db: self.content,
|
||||||
group_size: self.group_size,
|
group_size: self.group_size.get(),
|
||||||
min_level_size: self.min_level_size,
|
min_level_size: self.min_level_size.get(),
|
||||||
max_group_size: self.max_group_size,
|
max_group_size: self.max_group_size.get(),
|
||||||
};
|
};
|
||||||
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
let key_bytes = BoundCodec::bytes_encode(&key).unwrap();
|
||||||
update.delete(wtxn, field_id, &key_bytes, value).unwrap();
|
update.delete(wtxn, field_id, &key_bytes, value).unwrap();
|
||||||
@ -296,8 +313,8 @@ pub(crate) mod tests {
|
|||||||
let update = FacetsUpdateBulkInner {
|
let update = FacetsUpdateBulkInner {
|
||||||
db: self.content,
|
db: self.content,
|
||||||
new_data: Some(reader),
|
new_data: Some(reader),
|
||||||
group_size: self.group_size,
|
group_size: self.group_size.get(),
|
||||||
min_level_size: self.min_level_size,
|
min_level_size: self.min_level_size.get(),
|
||||||
};
|
};
|
||||||
|
|
||||||
update.update(wtxn, field_ids, |_, _, _| Ok(())).unwrap();
|
update.update(wtxn, field_ids, |_, _, _| Ok(())).unwrap();
|
||||||
@ -341,7 +358,7 @@ pub(crate) mod tests {
|
|||||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
|
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
|
||||||
};
|
};
|
||||||
|
|
||||||
assert!(value.size > 0 && value.size < self.max_group_size);
|
assert!(value.size > 0);
|
||||||
|
|
||||||
let mut actual_size = 0;
|
let mut actual_size = 0;
|
||||||
let mut values_below = RoaringBitmap::new();
|
let mut values_below = RoaringBitmap::new();
|
||||||
|
Loading…
Reference in New Issue
Block a user