Disable incremental facet update as a stop gap

This commit is contained in:
Louis Dureuil 2024-02-13 14:53:52 +01:00
parent b26ddfcc3d
commit 3a97d30cd9
No known key found for this signature in database
9 changed files with 11 additions and 2169 deletions

View File

@ -222,72 +222,3 @@ where
Ok(ControlFlow::Continue(()))
}
}
#[cfg(test)]
mod tests {
use std::ops::ControlFlow;
use heed::BytesDecode;
use roaring::RoaringBitmap;
use super::lexicographically_iterate_over_facet_distribution;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::milli_snap;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
#[test]
fn filter_distribution_all() {
let indexes = [get_simple_index(), get_random_looking_index()];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = (0..=255).collect::<RoaringBitmap>();
let mut results = String::new();
lexicographically_iterate_over_facet_distribution(
&txn,
index.content,
0,
&candidates,
|facet, count, _| {
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
results.push_str(&format!("{facet}: {count}\n"));
Ok(ControlFlow::Continue(()))
},
)
.unwrap();
milli_snap!(results, i);
txn.commit().unwrap();
}
}
#[test]
fn filter_distribution_all_stop_early() {
let indexes = [get_simple_index(), get_random_looking_index()];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = (0..=255).collect::<RoaringBitmap>();
let mut results = String::new();
let mut nbr_facets = 0;
lexicographically_iterate_over_facet_distribution(
&txn,
index.content,
0,
&candidates,
|facet, count, _| {
let facet = OrderedF64Codec::bytes_decode(facet).unwrap();
if nbr_facets == 100 {
Ok(ControlFlow::Break(()))
} else {
nbr_facets += 1;
results.push_str(&format!("{facet}: {count}\n"));
Ok(ControlFlow::Continue(()))
}
},
)
.unwrap();
milli_snap!(results, i);
txn.commit().unwrap();
}
}
}

View File

@ -303,347 +303,3 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
Ok(())
}
}
#[cfg(test)]
mod tests {
use std::ops::Bound;
use roaring::RoaringBitmap;
use super::find_docids_of_facet_within_bounds;
use crate::heed_codec::facet::{FacetGroupKeyCodec, OrderedF64Codec};
use crate::milli_snap;
use crate::search::facet::tests::{
get_random_looking_index, get_random_looking_index_with_multiple_field_ids,
get_simple_index, get_simple_index_with_multiple_field_ids,
};
use crate::snapshot_tests::display_bitmap;
#[test]
fn random_looking_index_snap() {
let index = get_random_looking_index();
milli_snap!(format!("{index}"), @"3256c76a7c1b768a013e78d5fa6e9ff9");
}
#[test]
fn random_looking_index_with_multiple_field_ids_snap() {
let index = get_random_looking_index_with_multiple_field_ids();
milli_snap!(format!("{index}"), @"c3e5fe06a8f1c404ed4935b32c90a89b");
}
#[test]
fn simple_index_snap() {
let index = get_simple_index();
milli_snap!(format!("{index}"), @"5dbfa134cc44abeb3ab6242fc182e48e");
}
#[test]
fn simple_index_with_multiple_field_ids_snap() {
let index = get_simple_index_with_multiple_field_ids();
milli_snap!(format!("{index}"), @"a4893298218f682bc76357f46777448c");
}
#[test]
fn filter_range_increasing() {
let indexes = [
get_simple_index(),
get_random_looking_index(),
get_simple_index_with_multiple_field_ids(),
get_random_looking_index_with_multiple_field_ids(),
];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let mut results = String::new();
for i in 0..=255 {
let i = i as f64;
let start = Bound::Included(0.);
let end = Bound::Included(i);
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
&mut docids,
)
.unwrap();
#[allow(clippy::format_push_string)]
results.push_str(&format!("0 <= . <= {i} : {}\n", display_bitmap(&docids)));
}
milli_snap!(results, format!("included_{i}"));
let mut results = String::new();
for i in 0..=255 {
let i = i as f64;
let start = Bound::Excluded(0.);
let end = Bound::Excluded(i);
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
&mut docids,
)
.unwrap();
#[allow(clippy::format_push_string)]
results.push_str(&format!("0 < . < {i} : {}\n", display_bitmap(&docids)));
}
milli_snap!(results, format!("excluded_{i}"));
txn.commit().unwrap();
}
}
#[test]
fn filter_range_decreasing() {
let indexes = [
get_simple_index(),
get_random_looking_index(),
get_simple_index_with_multiple_field_ids(),
get_random_looking_index_with_multiple_field_ids(),
];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let mut results = String::new();
for i in (0..=255).rev() {
let i = i as f64;
let start = Bound::Included(i);
let end = Bound::Included(255.);
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
&mut docids,
)
.unwrap();
results.push_str(&format!("{i} <= . <= 255 : {}\n", display_bitmap(&docids)));
}
milli_snap!(results, format!("included_{i}"));
let mut results = String::new();
for i in (0..=255).rev() {
let i = i as f64;
let start = Bound::Excluded(i);
let end = Bound::Excluded(255.);
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
&mut docids,
)
.unwrap();
results.push_str(&format!("{i} < . < 255 : {}\n", display_bitmap(&docids)));
}
milli_snap!(results, format!("excluded_{i}"));
txn.commit().unwrap();
}
}
#[test]
fn filter_range_pinch() {
let indexes = [
get_simple_index(),
get_random_looking_index(),
get_simple_index_with_multiple_field_ids(),
get_random_looking_index_with_multiple_field_ids(),
];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let mut results = String::new();
for i in (0..=128).rev() {
let i = i as f64;
let start = Bound::Included(i);
let end = Bound::Included(255. - i);
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
&mut docids,
)
.unwrap();
results.push_str(&format!(
"{i} <= . <= {r} : {docids}\n",
r = 255. - i,
docids = display_bitmap(&docids)
));
}
milli_snap!(results, format!("included_{i}"));
let mut results = String::new();
for i in (0..=128).rev() {
let i = i as f64;
let start = Bound::Excluded(i);
let end = Bound::Excluded(255. - i);
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
&mut docids,
)
.unwrap();
results.push_str(&format!(
"{i} < . < {r} {docids}\n",
r = 255. - i,
docids = display_bitmap(&docids)
));
}
milli_snap!(results, format!("excluded_{i}"));
txn.commit().unwrap();
}
}
#[test]
fn filter_range_unbounded() {
let indexes = [
get_simple_index(),
get_random_looking_index(),
get_simple_index_with_multiple_field_ids(),
get_random_looking_index_with_multiple_field_ids(),
];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let mut results = String::new();
for i in 0..=255 {
let i = i as f64;
let start = Bound::Included(i);
let end = Bound::Unbounded;
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
&mut docids,
)
.unwrap();
#[allow(clippy::format_push_string)]
results.push_str(&format!(">= {i}: {}\n", display_bitmap(&docids)));
}
milli_snap!(results, format!("start_from_included_{i}"));
let mut results = String::new();
for i in 0..=255 {
let i = i as f64;
let start = Bound::Unbounded;
let end = Bound::Included(i);
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
&mut docids,
)
.unwrap();
#[allow(clippy::format_push_string)]
results.push_str(&format!("<= {i}: {}\n", display_bitmap(&docids)));
}
milli_snap!(results, format!("end_at_included_{i}"));
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&Bound::Unbounded,
&Bound::Unbounded,
&mut docids,
)
.unwrap();
milli_snap!(
&format!("all field_id 0: {}\n", display_bitmap(&docids)),
format!("unbounded_field_id_0_{i}")
);
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
1,
&Bound::Unbounded,
&Bound::Unbounded,
&mut docids,
)
.unwrap();
milli_snap!(
&format!("all field_id 1: {}\n", display_bitmap(&docids)),
format!("unbounded_field_id_1_{i}")
);
drop(txn);
}
}
#[test]
fn filter_range_exact() {
let indexes = [
get_simple_index(),
get_random_looking_index(),
get_simple_index_with_multiple_field_ids(),
get_random_looking_index_with_multiple_field_ids(),
];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let mut results_0 = String::new();
let mut results_1 = String::new();
for i in 0..=255 {
let i = i as f64;
let start = Bound::Included(i);
let end = Bound::Included(i);
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
&mut docids,
)
.unwrap();
#[allow(clippy::format_push_string)]
results_0.push_str(&format!("{i}: {}\n", display_bitmap(&docids)));
let mut docids = RoaringBitmap::new();
find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
1,
&start,
&end,
&mut docids,
)
.unwrap();
#[allow(clippy::format_push_string)]
results_1.push_str(&format!("{i}: {}\n", display_bitmap(&docids)));
}
milli_snap!(results_0, format!("field_id_0_exact_{i}"));
milli_snap!(results_1, format!("field_id_1_exact_{i}"));
drop(txn);
}
}
}

View File

@ -112,119 +112,3 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
}
}
}
#[cfg(test)]
mod tests {
use roaring::RoaringBitmap;
use crate::milli_snap;
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
use crate::search::facet::tests::{
get_random_looking_index, get_random_looking_string_index_with_multiple_field_ids,
get_simple_index, get_simple_string_index_with_multiple_field_ids,
};
use crate::snapshot_tests::display_bitmap;
#[test]
fn filter_sort_ascending() {
let indexes = [get_simple_index(), get_random_looking_index()];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).collect::<RoaringBitmap>();
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
milli_snap!(results, i);
txn.commit().unwrap();
}
}
#[test]
fn filter_sort_ascending_multiple_field_ids() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).collect::<RoaringBitmap>();
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
milli_snap!(results, format!("{i}-0"));
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
milli_snap!(results, format!("{i}-1"));
txn.commit().unwrap();
}
}
#[test]
fn filter_sort_ascending_with_no_candidates() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());
txn.commit().unwrap();
}
}
#[test]
fn filter_sort_ascending_with_inexisting_field_id() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();
let iter = ascending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());
txn.commit().unwrap();
}
}
}

View File

@ -117,128 +117,3 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
}
}
}
#[cfg(test)]
mod tests {
use roaring::RoaringBitmap;
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::BytesRefCodec;
use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::tests::{
get_random_looking_index, get_random_looking_string_index_with_multiple_field_ids,
get_simple_index, get_simple_index_with_multiple_field_ids,
get_simple_string_index_with_multiple_field_ids,
};
use crate::snapshot_tests::display_bitmap;
#[test]
fn filter_sort_descending() {
let indexes = [
get_simple_index(),
get_random_looking_index(),
get_simple_index_with_multiple_field_ids(),
];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).collect::<RoaringBitmap>();
let mut results = String::new();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
milli_snap!(results, i);
txn.commit().unwrap();
}
}
#[test]
fn filter_sort_descending_multiple_field_ids() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).collect::<RoaringBitmap>();
let mut results = String::new();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
milli_snap!(results, format!("{i}-0"));
let mut results = String::new();
let iter = descending_facet_sort(&txn, db, 1, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
milli_snap!(results, format!("{i}-1"));
txn.commit().unwrap();
}
}
#[test]
fn filter_sort_ascending_with_no_candidates() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 0, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 1, candidates).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());
txn.commit().unwrap();
}
}
#[test]
fn filter_sort_ascending_with_inexisting_field_id() {
let indexes = [
get_simple_string_index_with_multiple_field_ids(),
get_random_looking_string_index_with_multiple_field_ids(),
];
for (_i, index) in indexes.iter().enumerate() {
let txn = index.env.read_txn().unwrap();
let candidates = RoaringBitmap::new();
let mut results = String::new();
let iter = descending_facet_sort(&txn, index.content, 3, candidates.clone()).unwrap();
for el in iter {
let (docids, _) = el.unwrap();
results.push_str(&display_bitmap(&docids));
results.push('\n');
}
assert!(results.is_empty());
txn.commit().unwrap();
}
}
}

View File

@ -116,109 +116,3 @@ pub(crate) fn get_highest_level<'t>(
})
.unwrap_or(0))
}
#[cfg(test)]
pub(crate) mod tests {
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::StrRefCodec;
use crate::update::facet::test_helpers::FacetIndex;
pub fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
for i in 0..256u16 {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(i as u32);
index.insert(&mut txn, 0, &(i as f64), &bitmap);
}
txn.commit().unwrap();
index
}
pub fn get_random_looking_index() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
for (_i, key) in std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);
index.insert(&mut txn, 0, &(key as f64), &bitmap);
}
txn.commit().unwrap();
index
}
pub fn get_simple_index_with_multiple_field_ids() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
for fid in 0..2 {
for i in 0..256u16 {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(i as u32);
index.insert(&mut txn, fid, &(i as f64), &bitmap);
}
}
txn.commit().unwrap();
index
}
pub fn get_random_looking_index_with_multiple_field_ids() -> FacetIndex<OrderedF64Codec> {
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for fid in 0..2 {
for (_i, &key) in keys.iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);
index.insert(&mut txn, fid, &(key as f64), &bitmap);
}
}
txn.commit().unwrap();
index
}
pub fn get_simple_string_index_with_multiple_field_ids() -> FacetIndex<StrRefCodec> {
let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
for fid in 0..2 {
for i in 0..256u16 {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(i as u32);
if i % 2 == 0 {
index.insert(&mut txn, fid, &format!("{i}").as_str(), &bitmap);
} else {
index.insert(&mut txn, fid, &"", &bitmap);
}
}
}
txn.commit().unwrap();
index
}
pub fn get_random_looking_string_index_with_multiple_field_ids() -> FacetIndex<StrRefCodec> {
let index = FacetIndex::<StrRefCodec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
let mut rng = rand::rngs::SmallRng::from_seed([0; 32]);
let keys =
std::iter::from_fn(|| Some(rng.gen_range(0..256))).take(128).collect::<Vec<u32>>();
for fid in 0..2 {
for (_i, &key) in keys.iter().enumerate() {
let mut bitmap = RoaringBitmap::new();
bitmap.insert(key);
bitmap.insert(key + 100);
if key % 2 == 0 {
index.insert(&mut txn, fid, &format!("{key}").as_str(), &bitmap);
} else {
index.insert(&mut txn, fid, &"", &bitmap);
}
}
}
txn.commit().unwrap();
index
}
}

View File

@ -407,54 +407,6 @@ mod tests {
test("large_group_small_min_level", 16, 2);
test("odd_group_odd_min_level", 7, 3);
}
#[test]
fn insert_delete_field_insert() {
let test = |name: &str, group_size: u8, min_level_size: u8| {
let index =
FacetIndex::<OrderedF64Codec>::new(group_size, 0 /*NA*/, min_level_size);
let mut wtxn = index.env.write_txn().unwrap();
let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new();
for i in 0..100u32 {
// field id = 0, left_bound = i, docids = [i]
elements.push(((0, i as f64), once(i).collect()));
}
for i in 0..100u32 {
// field id = 1, left_bound = i, docids = [i]
elements.push(((1, i as f64), once(i).collect()));
}
index.bulk_insert(&mut wtxn, &[0, 1], elements.iter());
index.verify_structure_validity(&wtxn, 0);
index.verify_structure_validity(&wtxn, 1);
// delete all the elements for the facet id 0
for i in 0..100u32 {
index.delete_single_docid(&mut wtxn, 0, &(i as f64), i);
}
index.verify_structure_validity(&wtxn, 0);
index.verify_structure_validity(&wtxn, 1);
let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new();
// then add some elements again for the facet id 1
for i in 0..110u32 {
// field id = 1, left_bound = i, docids = [i]
elements.push(((1, i as f64), once(i).collect()));
}
index.verify_structure_validity(&wtxn, 0);
index.verify_structure_validity(&wtxn, 1);
index.bulk_insert(&mut wtxn, &[0, 1], elements.iter());
wtxn.commit().unwrap();
milli_snap!(format!("{index}"), name);
};
test("default", 4, 5);
test("small_group_small_min_level", 2, 2);
test("small_group_large_min_level", 2, 128);
test("large_group_small_min_level", 16, 2);
test("odd_group_odd_min_level", 7, 3);
}
#[test]
fn bug_3165() {

File diff suppressed because it is too large Load Diff

View File

@ -72,7 +72,6 @@ two methods.
Related PR: https://github.com/meilisearch/milli/pull/619
*/
pub const FACET_MAX_GROUP_SIZE: u8 = 8;
pub const FACET_GROUP_SIZE: u8 = 4;
pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
@ -88,17 +87,14 @@ use heed::BytesEncode;
use log::debug;
use time::OffsetDateTime;
use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk;
use crate::facet::FacetType;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::BytesRefCodec;
use crate::heed_codec::facet::FacetGroupKey;
use crate::update::index_documents::create_sorter;
use crate::update::merge_btreeset_string;
use crate::{BEU16StrCodec, Index, Result, MAX_FACET_VALUE_LENGTH};
pub mod bulk;
pub mod incremental;
/// A builder used to add new elements to the `facet_id_string_docids` or `facet_id_f64_docids` databases.
///
@ -106,11 +102,9 @@ pub mod incremental;
/// a bulk update method or an incremental update method.
pub struct FacetsUpdate<'i> {
index: &'i Index,
database: heed::Database<FacetGroupKeyCodec<BytesRefCodec>, FacetGroupValueCodec>,
facet_type: FacetType,
delta_data: grenad::Reader<BufReader<File>>,
group_size: u8,
max_group_size: u8,
min_level_size: u8,
}
impl<'i> FacetsUpdate<'i> {
@ -119,19 +113,9 @@ impl<'i> FacetsUpdate<'i> {
facet_type: FacetType,
delta_data: grenad::Reader<BufReader<File>>,
) -> Self {
let database = match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
}
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<BytesRefCodec>>()
}
};
Self {
index,
database,
group_size: FACET_GROUP_SIZE,
max_group_size: FACET_MAX_GROUP_SIZE,
min_level_size: FACET_MIN_LEVEL_SIZE,
facet_type,
delta_data,
@ -145,30 +129,16 @@ impl<'i> FacetsUpdate<'i> {
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
self.index.set_updated_at(wtxn, &OffsetDateTime::now_utc())?;
// See self::comparison_bench::benchmark_facet_indexing
if self.delta_data.len() >= (self.database.len(wtxn)? / 50) {
let field_ids =
self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
let bulk_update = FacetsUpdateBulk::new(
self.index,
field_ids,
self.facet_type,
self.delta_data,
self.group_size,
self.min_level_size,
);
bulk_update.execute(wtxn)?;
} else {
let incremental_update = FacetsUpdateIncremental::new(
self.index,
self.facet_type,
self.delta_data,
self.group_size,
self.min_level_size,
self.max_group_size,
);
incremental_update.execute(wtxn)?;
}
let field_ids = self.index.faceted_fields_ids(wtxn)?.iter().copied().collect::<Vec<_>>();
let bulk_update = FacetsUpdateBulk::new(
self.index,
field_ids,
self.facet_type,
self.delta_data,
self.group_size,
self.min_level_size,
);
bulk_update.execute(wtxn)?;
// We clear the list of normalized-for-search facets
// and the previous FSTs to compute everything from scratch
@ -264,7 +234,6 @@ impl<'i> FacetsUpdate<'i> {
pub(crate) mod test_helpers {
use std::cell::Cell;
use std::fmt::Display;
use std::iter::FromIterator;
use std::marker::PhantomData;
use std::rc::Rc;
@ -280,7 +249,6 @@ pub(crate) mod test_helpers {
use crate::search::facet::get_highest_level;
use crate::snapshot_tests::display_bitmap;
use crate::update::del_add::{DelAdd, KvWriterDelAdd};
use crate::update::FacetsUpdateIncrementalInner;
use crate::CboRoaringBitmapCodec;
/// Utility function to generate a string whose position in a lexicographically
@ -396,49 +364,6 @@ pub(crate) mod test_helpers {
self.min_level_size.set(std::cmp::max(1, min_level_size));
}
pub fn insert<'a>(
&self,
wtxn: &'a mut RwTxn,
field_id: u16,
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
docids: &RoaringBitmap,
) {
let update = FacetsUpdateIncrementalInner {
db: self.content,
group_size: self.group_size.get(),
min_level_size: self.min_level_size.get(),
max_group_size: self.max_group_size.get(),
};
let key_bytes = BoundCodec::bytes_encode(key).unwrap();
update.insert(wtxn, field_id, &key_bytes, docids).unwrap();
}
pub fn delete_single_docid<'a>(
&self,
wtxn: &'a mut RwTxn,
field_id: u16,
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
docid: u32,
) {
self.delete(wtxn, field_id, key, &RoaringBitmap::from_iter(std::iter::once(docid)))
}
pub fn delete<'a>(
&self,
wtxn: &'a mut RwTxn,
field_id: u16,
key: &'a <BoundCodec as BytesEncode<'a>>::EItem,
docids: &RoaringBitmap,
) {
let update = FacetsUpdateIncrementalInner {
db: self.content,
group_size: self.group_size.get(),
min_level_size: self.min_level_size.get(),
max_group_size: self.max_group_size.get(),
};
let key_bytes = BoundCodec::bytes_encode(key).unwrap();
update.delete(wtxn, field_id, &key_bytes, docids).unwrap();
}
pub fn bulk_insert<'a, 'b>(
&self,
wtxn: &'a mut RwTxn,
@ -555,63 +480,3 @@ pub(crate) mod test_helpers {
}
}
}
#[allow(unused)]
#[cfg(test)]
mod comparison_bench {
use std::iter::once;
use rand::Rng;
use roaring::RoaringBitmap;
use super::test_helpers::FacetIndex;
use crate::heed_codec::facet::OrderedF64Codec;
// This is a simple test to get an intuition on the relative speed
// of the incremental vs. bulk indexer.
//
// The benchmark shows the worst-case scenario for the incremental indexer, since
// each facet value contains only one document ID.
//
// In that scenario, it appears that the incremental indexer is about 50 times slower than the
// bulk indexer.
// #[test]
fn benchmark_facet_indexing() {
let mut facet_value = 0;
let mut r = rand::thread_rng();
for i in 1..=20 {
let size = 50_000 * i;
let index = FacetIndex::<OrderedF64Codec>::new(4, 8, 5);
let mut txn = index.env.write_txn().unwrap();
let mut elements = Vec::<((u16, f64), RoaringBitmap)>::new();
for i in 0..size {
// field id = 0, left_bound = i, docids = [i]
elements.push(((0, facet_value as f64), once(i).collect()));
facet_value += 1;
}
let timer = std::time::Instant::now();
index.bulk_insert(&mut txn, &[0], elements.iter());
let time_spent = timer.elapsed().as_millis();
println!("bulk {size} : {time_spent}ms");
txn.commit().unwrap();
for nbr_doc in [1, 100, 1000, 10_000] {
let mut txn = index.env.write_txn().unwrap();
let timer = std::time::Instant::now();
//
// insert one document
//
for _ in 0..nbr_doc {
index.insert(&mut txn, 0, &r.gen(), &once(1).collect());
}
let time_spent = timer.elapsed().as_millis();
println!(" add {nbr_doc} : {time_spent}ms");
txn.abort();
}
}
}
}

View File

@ -1,7 +1,6 @@
pub use self::available_documents_ids::AvailableDocumentsIds;
pub use self::clear_documents::ClearDocuments;
pub use self::facet::bulk::FacetsUpdateBulk;
pub use self::facet::incremental::FacetsUpdateIncrementalInner;
pub use self::index_documents::{
merge_btreeset_string, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
DocumentAdditionResult, DocumentId, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,