mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
Rename facet codecs and refine FacetsUpdate API
This commit is contained in:
parent
485a72306d
commit
330c9eb1b2
@ -4,7 +4,9 @@ use heed::Result;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level};
|
use super::{get_first_facet_value, get_highest_level};
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupValueCodec, FacetGroupKeyCodec};
|
use crate::heed_codec::facet::{
|
||||||
|
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
|
||||||
|
};
|
||||||
|
|
||||||
pub fn iterate_over_facet_distribution<'t, CB>(
|
pub fn iterate_over_facet_distribution<'t, CB>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
@ -78,7 +80,8 @@ where
|
|||||||
if level == 0 {
|
if level == 0 {
|
||||||
return self.iterate_level_0(candidates, starting_bound, group_size);
|
return self.iterate_level_0(candidates, starting_bound, group_size);
|
||||||
}
|
}
|
||||||
let starting_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound };
|
let starting_key =
|
||||||
|
FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound };
|
||||||
let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size);
|
let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size);
|
||||||
|
|
||||||
for el in iter {
|
for el in iter {
|
||||||
@ -109,16 +112,14 @@ where
|
|||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
use std::ops::ControlFlow;
|
use super::iterate_over_facet_distribution;
|
||||||
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
|
use crate::milli_snap;
|
||||||
|
use crate::search::facet::test::FacetIndex;
|
||||||
use heed::BytesDecode;
|
use heed::BytesDecode;
|
||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
use std::ops::ControlFlow;
|
||||||
use super::iterate_over_facet_distribution;
|
|
||||||
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
|
||||||
use crate::milli_snap;
|
|
||||||
use crate::search::facet::test::FacetIndex;
|
|
||||||
|
|
||||||
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
fn get_simple_index() -> FacetIndex<OrderedF64Codec> {
|
||||||
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
let index = FacetIndex::<OrderedF64Codec>::new(4, 8);
|
||||||
|
@ -4,7 +4,9 @@ use heed::BytesEncode;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||||
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef};
|
use crate::heed_codec::facet::{
|
||||||
|
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
|
||||||
|
};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
|
||||||
@ -117,7 +119,8 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
|
|||||||
return self.run_level_0(starting_left_bound, group_size);
|
return self.run_level_0(starting_left_bound, group_size);
|
||||||
}
|
}
|
||||||
|
|
||||||
let left_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound };
|
let left_key =
|
||||||
|
FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound };
|
||||||
let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
|
let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
|
||||||
|
|
||||||
let (mut previous_key, mut previous_value) = iter.next().unwrap()?;
|
let (mut previous_key, mut previous_value) = iter.next().unwrap()?;
|
||||||
@ -258,8 +261,8 @@ mod tests {
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::find_docids_of_facet_within_bounds;
|
use super::find_docids_of_facet_within_bounds;
|
||||||
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
|
||||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||||
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
use crate::snapshot_tests::display_bitmap;
|
use crate::snapshot_tests::display_bitmap;
|
||||||
|
@ -3,7 +3,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level};
|
use super::{get_first_facet_value, get_highest_level};
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn ascending_facet_sort<'t>(
|
pub fn ascending_facet_sort<'t>(
|
||||||
@ -86,7 +86,7 @@ mod tests {
|
|||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
|
@ -5,7 +5,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
|
|
||||||
pub fn descending_facet_sort<'t>(
|
pub fn descending_facet_sort<'t>(
|
||||||
@ -37,7 +37,9 @@ struct DescendingFacetSort<'t> {
|
|||||||
field_id: u16,
|
field_id: u16,
|
||||||
stack: Vec<(
|
stack: Vec<(
|
||||||
RoaringBitmap,
|
RoaringBitmap,
|
||||||
std::iter::Take<heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
|
std::iter::Take<
|
||||||
|
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
|
>,
|
||||||
Bound<&'t [u8]>,
|
Bound<&'t [u8]>,
|
||||||
)>,
|
)>,
|
||||||
}
|
}
|
||||||
@ -72,7 +74,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
|||||||
if level == 0 {
|
if level == 0 {
|
||||||
return Some(Ok(bitmap));
|
return Some(Ok(bitmap));
|
||||||
}
|
}
|
||||||
let starting_key_below = FacetGroupKey { field_id, level: level - 1, left_bound };
|
let starting_key_below =
|
||||||
|
FacetGroupKey { field_id, level: level - 1, left_bound };
|
||||||
|
|
||||||
let end_key_kelow = match *right_bound {
|
let end_key_kelow = match *right_bound {
|
||||||
Bound::Included(right) => Bound::Included(FacetGroupKey {
|
Bound::Included(right) => Bound::Included(FacetGroupKey {
|
||||||
@ -89,15 +92,17 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
|||||||
};
|
};
|
||||||
let prev_right_bound = *right_bound;
|
let prev_right_bound = *right_bound;
|
||||||
*right_bound = Bound::Excluded(left_bound);
|
*right_bound = Bound::Excluded(left_bound);
|
||||||
let iter =
|
let iter = match self
|
||||||
match self.db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>().rev_range(
|
.db
|
||||||
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
|
.rev_range(
|
||||||
&self.rtxn,
|
&self.rtxn,
|
||||||
&(Bound::Included(starting_key_below), end_key_kelow),
|
&(Bound::Included(starting_key_below), end_key_kelow),
|
||||||
) {
|
) {
|
||||||
Ok(iter) => iter,
|
Ok(iter) => iter,
|
||||||
Err(e) => return Some(Err(e.into())),
|
Err(e) => return Some(Err(e.into())),
|
||||||
}
|
}
|
||||||
.take(group_size as usize);
|
.take(group_size as usize);
|
||||||
|
|
||||||
self.stack.push((bitmap, iter, prev_right_bound));
|
self.stack.push((bitmap, iter, prev_right_bound));
|
||||||
continue 'outer;
|
continue 'outer;
|
||||||
@ -114,8 +119,8 @@ mod tests {
|
|||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef};
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
|
@ -1,30 +1,24 @@
|
|||||||
|
use crate::facet::FacetType;
|
||||||
|
use crate::heed_codec::facet::{
|
||||||
|
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
|
};
|
||||||
|
use crate::update::index_documents::{create_writer, writer_into_reader};
|
||||||
|
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||||
|
use grenad::CompressionType;
|
||||||
|
use heed::types::ByteSlice;
|
||||||
|
use heed::{BytesEncode, Error, RoTxn, RwTxn};
|
||||||
|
use log::debug;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::cmp;
|
use std::cmp;
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::num::NonZeroUsize;
|
|
||||||
|
|
||||||
use grenad::CompressionType;
|
|
||||||
use heed::types::{ByteSlice, DecodeIgnore};
|
|
||||||
use heed::{BytesDecode, BytesEncode, Error, RoTxn, RwTxn};
|
|
||||||
use log::debug;
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use crate::error::InternalError;
|
|
||||||
use crate::facet::FacetType;
|
|
||||||
use crate::heed_codec::facet::{
|
|
||||||
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
|
||||||
};
|
|
||||||
use crate::update::index_documents::{
|
|
||||||
create_writer, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
|
|
||||||
};
|
|
||||||
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
|
||||||
|
|
||||||
pub struct FacetsUpdateBulk<'i> {
|
pub struct FacetsUpdateBulk<'i> {
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
level_group_size: usize,
|
level_group_size: u8,
|
||||||
min_level_size: usize,
|
min_level_size: u8,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
// None if level 0 does not need to be updated
|
// None if level 0 does not need to be updated
|
||||||
new_data: Option<grenad::Reader<File>>,
|
new_data: Option<grenad::Reader<File>>,
|
||||||
@ -39,9 +33,9 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
FacetsUpdateBulk {
|
FacetsUpdateBulk {
|
||||||
index,
|
index,
|
||||||
database: match facet_type {
|
database: match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => index
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
.facet_id_string_docids
|
||||||
}
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
}
|
}
|
||||||
@ -60,9 +54,9 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
FacetsUpdateBulk {
|
FacetsUpdateBulk {
|
||||||
index,
|
index,
|
||||||
database: match facet_type {
|
database: match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => index
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
.facet_id_string_docids
|
||||||
}
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||||
}
|
}
|
||||||
@ -77,14 +71,14 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
/// The number of elements from the level below that are represented by a single element in the level above
|
/// The number of elements from the level below that are represented by a single element in the level above
|
||||||
///
|
///
|
||||||
/// This setting is always greater than or equal to 2.
|
/// This setting is always greater than or equal to 2.
|
||||||
pub fn level_group_size(&mut self, value: NonZeroUsize) -> &mut Self {
|
pub fn level_group_size(mut self, value: u8) -> Self {
|
||||||
self.level_group_size = cmp::max(value.get(), 2);
|
self.level_group_size = cmp::max(value, 2);
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The minimum number of elements that a level is allowed to have.
|
/// The minimum number of elements that a level is allowed to have.
|
||||||
pub fn min_level_size(&mut self, value: NonZeroUsize) -> &mut Self {
|
pub fn min_level_size(mut self, value: u8) -> Self {
|
||||||
self.min_level_size = value.get();
|
self.min_level_size = cmp::max(value, 1);
|
||||||
self
|
self
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -109,8 +103,6 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
}
|
}
|
||||||
self.update_level0(wtxn)?;
|
self.update_level0(wtxn)?;
|
||||||
|
|
||||||
// let mut nested_wtxn = self.index.env.nested_write_txn(wtxn)?;
|
|
||||||
|
|
||||||
for &field_id in faceted_fields.iter() {
|
for &field_id in faceted_fields.iter() {
|
||||||
let (level_readers, all_docids) = self.compute_levels_for_field_id(field_id, &wtxn)?;
|
let (level_readers, all_docids) = self.compute_levels_for_field_id(field_id, &wtxn)?;
|
||||||
|
|
||||||
@ -119,10 +111,6 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
for level_reader in level_readers {
|
for level_reader in level_readers {
|
||||||
let mut cursor = level_reader.into_cursor()?;
|
let mut cursor = level_reader.into_cursor()?;
|
||||||
while let Some((k, v)) = cursor.move_on_next()? {
|
while let Some((k, v)) = cursor.move_on_next()? {
|
||||||
let key = FacetGroupKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap();
|
|
||||||
let value = FacetGroupValueCodec::bytes_decode(v).unwrap();
|
|
||||||
println!("inserting {key:?} {value:?}");
|
|
||||||
|
|
||||||
self.database.remap_types::<ByteSlice, ByteSlice>().put(wtxn, k, v)?;
|
self.database.remap_types::<ByteSlice, ByteSlice>().put(wtxn, k, v)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -141,14 +129,12 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
let mut database = self.database.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>();
|
let mut database = self.database.iter_mut(wtxn)?.remap_types::<ByteSlice, ByteSlice>();
|
||||||
let mut cursor = new_data.into_cursor()?;
|
let mut cursor = new_data.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
if valid_lmdb_key(key) {
|
buffer.clear();
|
||||||
buffer.clear();
|
// the group size for level 0
|
||||||
// the group size for level 0
|
buffer.push(1);
|
||||||
buffer.push(1);
|
// then we extend the buffer with the docids bitmap
|
||||||
// then we extend the buffer with the docids bitmap
|
buffer.extend_from_slice(value);
|
||||||
buffer.extend_from_slice(value);
|
unsafe { database.append(key, &buffer)? };
|
||||||
unsafe { database.append(key, &buffer)? };
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
@ -156,25 +142,24 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
|
|
||||||
let mut cursor = new_data.into_cursor()?;
|
let mut cursor = new_data.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
if valid_lmdb_key(key) {
|
// the value is a CboRoaringBitmap, but I still need to prepend the
|
||||||
buffer.clear();
|
// group size for level 0 (= 1) to it
|
||||||
// the group size for level 0
|
buffer.clear();
|
||||||
buffer.push(1);
|
buffer.push(1);
|
||||||
// then we extend the buffer with the docids bitmap
|
// then we extend the buffer with the docids bitmap
|
||||||
match database.get(wtxn, key)? {
|
match database.get(wtxn, key)? {
|
||||||
Some(prev_value) => {
|
Some(prev_value) => {
|
||||||
let old_bitmap = &prev_value[1..];
|
let old_bitmap = &prev_value[1..];
|
||||||
CboRoaringBitmapCodec::merge_into(
|
CboRoaringBitmapCodec::merge_into(
|
||||||
&[Cow::Borrowed(value), Cow::Borrowed(old_bitmap)],
|
&[Cow::Borrowed(value), Cow::Borrowed(old_bitmap)],
|
||||||
&mut buffer,
|
&mut buffer,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
None => {
|
None => {
|
||||||
buffer.extend_from_slice(value);
|
buffer.extend_from_slice(value);
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
database.put(wtxn, key, &buffer)?;
|
database.put(wtxn, key, &buffer)?;
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -186,7 +171,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
txn: &RoTxn,
|
txn: &RoTxn,
|
||||||
) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
|
) -> Result<(Vec<grenad::Reader<File>>, RoaringBitmap)> {
|
||||||
// TODO: first check whether there is anything in level 0
|
// TODO: first check whether there is anything in level 0?
|
||||||
let algo = ComputeHigherLevels {
|
let algo = ComputeHigherLevels {
|
||||||
rtxn: txn,
|
rtxn: txn,
|
||||||
db: &self.database,
|
db: &self.database,
|
||||||
@ -212,8 +197,8 @@ struct ComputeHigherLevels<'t> {
|
|||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: &'t heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: &'t heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
level_group_size: usize,
|
level_group_size: u8,
|
||||||
min_level_size: usize,
|
min_level_size: u8,
|
||||||
}
|
}
|
||||||
impl<'t> ComputeHigherLevels<'t> {
|
impl<'t> ComputeHigherLevels<'t> {
|
||||||
fn read_level_0(
|
fn read_level_0(
|
||||||
@ -248,7 +233,7 @@ impl<'t> ComputeHigherLevels<'t> {
|
|||||||
}
|
}
|
||||||
bitmaps.push(docids);
|
bitmaps.push(docids);
|
||||||
|
|
||||||
if bitmaps.len() == self.level_group_size {
|
if bitmaps.len() == self.level_group_size as usize {
|
||||||
handle_group(&bitmaps, left_bound)?;
|
handle_group(&bitmaps, left_bound)?;
|
||||||
first_iteration_for_new_group = true;
|
first_iteration_for_new_group = true;
|
||||||
bitmaps.clear();
|
bitmaps.clear();
|
||||||
@ -265,9 +250,8 @@ impl<'t> ComputeHigherLevels<'t> {
|
|||||||
/// Compute the content of the database levels from its level 0 for the given field id.
|
/// Compute the content of the database levels from its level 0 for the given field id.
|
||||||
///
|
///
|
||||||
/// ## Returns:
|
/// ## Returns:
|
||||||
/// 1. a vector of grenad::Reader. The reader at index `i` corresponds to the elements of level `i + 1`
|
/// A vector of grenad::Reader. The reader at index `i` corresponds to the elements of level `i + 1`
|
||||||
/// that must be inserted into the database.
|
/// that must be inserted into the database.
|
||||||
/// 2. a roaring bitmap of all the document ids present in the database
|
|
||||||
fn compute_higher_levels(
|
fn compute_higher_levels(
|
||||||
&self,
|
&self,
|
||||||
level: u8,
|
level: u8,
|
||||||
@ -302,7 +286,7 @@ impl<'t> ComputeHigherLevels<'t> {
|
|||||||
left_bounds.push(left_bound);
|
left_bounds.push(left_bound);
|
||||||
|
|
||||||
bitmaps.push(combined_bitmap);
|
bitmaps.push(combined_bitmap);
|
||||||
if bitmaps.len() != self.level_group_size {
|
if bitmaps.len() != self.level_group_size as usize {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
let left_bound = left_bounds.first().unwrap();
|
let left_bound = left_bounds.first().unwrap();
|
||||||
@ -312,8 +296,8 @@ impl<'t> ComputeHigherLevels<'t> {
|
|||||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||||
{
|
{
|
||||||
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
|
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
|
||||||
let key =
|
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key)
|
||||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
|
.ok_or(Error::Encoding)?;
|
||||||
let value = FacetGroupValue { size: group_size, bitmap };
|
let value = FacetGroupValue { size: group_size, bitmap };
|
||||||
let value =
|
let value =
|
||||||
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
||||||
@ -330,8 +314,8 @@ impl<'t> ComputeHigherLevels<'t> {
|
|||||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||||
{
|
{
|
||||||
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
|
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
|
||||||
let key =
|
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key)
|
||||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
|
.ok_or(Error::Encoding)?;
|
||||||
let value = FacetGroupValue { size: group_size, bitmap };
|
let value = FacetGroupValue { size: group_size, bitmap };
|
||||||
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
||||||
cur_writer.insert(key, value)?;
|
cur_writer.insert(key, value)?;
|
||||||
@ -340,6 +324,10 @@ impl<'t> ComputeHigherLevels<'t> {
|
|||||||
}
|
}
|
||||||
if cur_writer_len > self.min_level_size {
|
if cur_writer_len > self.min_level_size {
|
||||||
sub_writers.push(writer_into_reader(cur_writer)?);
|
sub_writers.push(writer_into_reader(cur_writer)?);
|
||||||
|
} else {
|
||||||
|
if !bitmaps.is_empty() {
|
||||||
|
handle_group(&bitmaps, left_bounds.first().unwrap())?;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
return Ok(sub_writers);
|
return Ok(sub_writers);
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@ use heed::{BytesDecode, Error, RoTxn, RwTxn};
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
|
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@ -20,14 +20,26 @@ enum DeletionResult {
|
|||||||
|
|
||||||
pub struct FacetsUpdateIncremental {
|
pub struct FacetsUpdateIncremental {
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||||
group_size: usize,
|
group_size: u8,
|
||||||
min_level_size: usize,
|
min_level_size: u8,
|
||||||
max_group_size: usize,
|
max_group_size: u8,
|
||||||
}
|
}
|
||||||
impl FacetsUpdateIncremental {
|
impl FacetsUpdateIncremental {
|
||||||
pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self {
|
pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self {
|
||||||
Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 }
|
Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 }
|
||||||
}
|
}
|
||||||
|
pub fn group_size(mut self, size: u8) -> Self {
|
||||||
|
self.group_size = size;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn min_level_size(mut self, size: u8) -> Self {
|
||||||
|
self.min_level_size = size;
|
||||||
|
self
|
||||||
|
}
|
||||||
|
pub fn max_group_size(mut self, size: u8) -> Self {
|
||||||
|
self.max_group_size = size;
|
||||||
|
self
|
||||||
|
}
|
||||||
}
|
}
|
||||||
impl FacetsUpdateIncremental {
|
impl FacetsUpdateIncremental {
|
||||||
fn find_insertion_key_value(
|
fn find_insertion_key_value(
|
||||||
@ -178,12 +190,7 @@ impl FacetsUpdateIncremental {
|
|||||||
let mut updated_value = self.db.get(&txn, &insertion_key.as_ref())?.unwrap();
|
let mut updated_value = self.db.get(&txn, &insertion_key.as_ref())?.unwrap();
|
||||||
|
|
||||||
updated_value.size += 1;
|
updated_value.size += 1;
|
||||||
if updated_value.size as usize == max_group_size {
|
if updated_value.size == max_group_size {
|
||||||
// need to split it
|
|
||||||
// recompute left element and right element
|
|
||||||
// replace current group by left element
|
|
||||||
// add one more group to the right
|
|
||||||
|
|
||||||
let size_left = max_group_size / 2;
|
let size_left = max_group_size / 2;
|
||||||
let size_right = max_group_size - size_left;
|
let size_right = max_group_size - size_left;
|
||||||
|
|
||||||
@ -201,7 +208,7 @@ impl FacetsUpdateIncremental {
|
|||||||
)?
|
)?
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let mut iter = self.db.range(&txn, &(start_key..))?.take(max_group_size);
|
let mut iter = self.db.range(&txn, &(start_key..))?.take(max_group_size as usize);
|
||||||
|
|
||||||
let group_left = {
|
let group_left = {
|
||||||
let mut values_left = RoaringBitmap::new();
|
let mut values_left = RoaringBitmap::new();
|
||||||
@ -234,8 +241,11 @@ impl FacetsUpdateIncremental {
|
|||||||
values_right |= &value.bitmap;
|
values_right |= &value.bitmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
let key =
|
let key = FacetGroupKey {
|
||||||
FacetGroupKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() };
|
field_id,
|
||||||
|
level,
|
||||||
|
left_bound: right_start_key.unwrap().to_vec(),
|
||||||
|
};
|
||||||
let value = FacetGroupValue { size: size_right as u8, bitmap: values_right };
|
let value = FacetGroupValue { size: size_right as u8, bitmap: values_right };
|
||||||
(key, value)
|
(key, value)
|
||||||
};
|
};
|
||||||
@ -288,7 +298,7 @@ impl FacetsUpdateIncremental {
|
|||||||
.prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)?
|
.prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)?
|
||||||
.count();
|
.count();
|
||||||
|
|
||||||
if size_highest_level < self.group_size * self.min_level_size {
|
if size_highest_level < self.group_size as usize * self.min_level_size as usize {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -438,7 +448,7 @@ impl FacetsUpdateIncremental {
|
|||||||
.as_polymorph()
|
.as_polymorph()
|
||||||
.prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)?
|
.prefix_iter::<_, ByteSlice, ByteSlice>(&txn, &highest_level_prefix)?
|
||||||
.count()
|
.count()
|
||||||
>= self.group_size
|
>= self.min_level_size as usize
|
||||||
{
|
{
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
@ -450,7 +460,9 @@ impl FacetsUpdateIncremental {
|
|||||||
while let Some(el) = iter.next() {
|
while let Some(el) = iter.next() {
|
||||||
let (k, _) = el?;
|
let (k, _) = el?;
|
||||||
to_delete.push(
|
to_delete.push(
|
||||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(),
|
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k)
|
||||||
|
.ok_or(Error::Encoding)?
|
||||||
|
.into_owned(),
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
drop(iter);
|
drop(iter);
|
||||||
@ -469,9 +481,9 @@ mod tests {
|
|||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
use crate::heed_codec::facet::str_ref::StrRefCodec;
|
use crate::heed_codec::facet::StrRefCodec;
|
||||||
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
|
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
use crate::search::facet::test::FacetIndex;
|
use crate::search::facet::test::FacetIndex;
|
||||||
|
@ -4,7 +4,6 @@ use crate::{
|
|||||||
heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec},
|
heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec},
|
||||||
CboRoaringBitmapCodec, FieldId, Index, Result,
|
CboRoaringBitmapCodec, FieldId, Index, Result,
|
||||||
};
|
};
|
||||||
use grenad::CompressionType;
|
|
||||||
use heed::BytesDecode;
|
use heed::BytesDecode;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use std::{collections::HashMap, fs::File};
|
use std::{collections::HashMap, fs::File};
|
||||||
@ -42,26 +41,17 @@ impl<'i> FacetsUpdate<'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// /// The number of elements from the level below that are represented by a single element in the level above
|
|
||||||
// ///
|
|
||||||
// /// This setting is always greater than or equal to 2.
|
|
||||||
// pub fn level_group_size(&mut self, value: u8) -> &mut Self {
|
|
||||||
// self.level_group_size = std::cmp::max(value, 2);
|
|
||||||
// self
|
|
||||||
// }
|
|
||||||
|
|
||||||
// /// The minimum number of elements that a level is allowed to have.
|
|
||||||
// pub fn min_level_size(&mut self, value: u8) -> &mut Self {
|
|
||||||
// self.min_level_size = std::cmp::max(value, 1);
|
|
||||||
// self
|
|
||||||
// }
|
|
||||||
|
|
||||||
pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
|
pub fn execute(self, wtxn: &mut heed::RwTxn) -> Result<()> {
|
||||||
|
// here, come up with a better condition!
|
||||||
if self.database.is_empty(wtxn)? {
|
if self.database.is_empty(wtxn)? {
|
||||||
let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data);
|
let bulk_update = FacetsUpdateBulk::new(self.index, self.facet_type, self.new_data)
|
||||||
|
.level_group_size(self.level_group_size)
|
||||||
|
.min_level_size(self.min_level_size);
|
||||||
bulk_update.execute(wtxn)?;
|
bulk_update.execute(wtxn)?;
|
||||||
} else {
|
} else {
|
||||||
let indexer = FacetsUpdateIncremental::new(self.database);
|
let indexer = FacetsUpdateIncremental::new(self.database)
|
||||||
|
.max_group_size(self.max_level_group_size)
|
||||||
|
.min_level_size(self.min_level_size);
|
||||||
|
|
||||||
let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
|
let mut new_faceted_docids = HashMap::<FieldId, RoaringBitmap>::default();
|
||||||
|
|
||||||
|
@ -16,8 +16,4 @@ source: milli/src/update/facet/incremental.rs
|
|||||||
0 0 k12 1 "[12, ]"
|
0 0 k12 1 "[12, ]"
|
||||||
0 0 k13 1 "[13, ]"
|
0 0 k13 1 "[13, ]"
|
||||||
0 0 k14 1 "[14, ]"
|
0 0 k14 1 "[14, ]"
|
||||||
0 1 k0 4 "[0, 1, 2, 3, ]"
|
|
||||||
0 1 k4 4 "[4, 5, 6, 7, ]"
|
|
||||||
0 1 k8 4 "[8, 9, 10, 11, ]"
|
|
||||||
0 1 k12 3 "[12, 13, 14, ]"
|
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user