mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
Introduce the CboRoaringBitmapCodec merge_deladd_into and use it
This commit is contained in:
parent
2d3f15f82c
commit
560e8f5613
@ -6,6 +6,7 @@ use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::BytesDecodeOwned;
|
||||
use crate::update::del_add::{DelAdd, KvReaderDelAdd};
|
||||
|
||||
/// This is the limit where using a byteorder became less size efficient
|
||||
/// than using a direct roaring encoding, it is also the point where we are able
|
||||
@ -99,6 +100,28 @@ impl CboRoaringBitmapCodec {
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Merges a DelAdd delta into a CboRoaringBitmap.
|
||||
pub fn merge_deladd_into(
|
||||
deladd: KvReaderDelAdd<'_>,
|
||||
previous: &[u8],
|
||||
buffer: &mut Vec<u8>,
|
||||
) -> io::Result<()> {
|
||||
// Deserialize the bitmap that is already there
|
||||
let mut previous = Self::deserialize_from(previous)?;
|
||||
|
||||
// Remove integers we no more want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Deletion) {
|
||||
previous -= Self::deserialize_from(value)?;
|
||||
}
|
||||
|
||||
// Insert the new integers we want in the previous bitmap
|
||||
if let Some(value) = deladd.get(DelAdd::Addition) {
|
||||
previous |= Self::deserialize_from(value)?;
|
||||
}
|
||||
|
||||
previous.serialize_into(buffer)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
|
||||
|
@ -134,7 +134,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn,
|
||||
index_is_empty,
|
||||
deladd_serialize_add_side,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
@ -153,7 +153,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn,
|
||||
index_is_empty,
|
||||
deladd_serialize_add_side,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
)?;
|
||||
|
||||
let exact_word_docids_iter = unsafe { as_cloneable_grenad(&exact_word_docids_reader) }?;
|
||||
@ -163,7 +163,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn,
|
||||
index_is_empty,
|
||||
deladd_serialize_add_side,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
)?;
|
||||
|
||||
let word_fid_docids_iter = unsafe { as_cloneable_grenad(&word_fid_docids_reader) }?;
|
||||
@ -173,7 +173,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn,
|
||||
index_is_empty,
|
||||
deladd_serialize_add_side,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
)?;
|
||||
|
||||
// create fst from word docids
|
||||
@ -195,7 +195,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn,
|
||||
index_is_empty,
|
||||
deladd_serialize_add_side,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
@ -216,7 +216,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn,
|
||||
index_is_empty,
|
||||
deladd_serialize_add_side,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
@ -227,7 +227,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn,
|
||||
index_is_empty,
|
||||
deladd_serialize_add_side,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
@ -238,7 +238,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn,
|
||||
index_is_empty,
|
||||
deladd_serialize_add_side,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
@ -249,7 +249,7 @@ pub(crate) fn write_typed_chunk_into_index(
|
||||
wtxn,
|
||||
index_is_empty,
|
||||
deladd_serialize_add_side,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
merge_deladd_cbo_roaring_bitmaps,
|
||||
)?;
|
||||
is_merged_database = true;
|
||||
}
|
||||
@ -388,17 +388,6 @@ fn merge_word_docids_reader_into_fst(
|
||||
Ok(builder.into_set())
|
||||
}
|
||||
|
||||
fn merge_cbo_roaring_bitmaps(
|
||||
new_value: &[u8],
|
||||
db_value: &[u8],
|
||||
buffer: &mut Vec<u8>,
|
||||
) -> Result<()> {
|
||||
Ok(CboRoaringBitmapCodec::merge_into(
|
||||
&[Cow::Borrowed(db_value), Cow::Borrowed(new_value)],
|
||||
buffer,
|
||||
)?)
|
||||
}
|
||||
|
||||
/// A function that extracts and returns the Add side of a DelAdd obkv.
|
||||
/// This is useful when there are no previous value in the database and
|
||||
/// therefore we don't need to do a diff with what's already there.
|
||||
@ -409,6 +398,22 @@ fn deladd_serialize_add_side<'a>(obkv: &'a [u8], _buffer: &mut Vec<u8>) -> Resul
|
||||
Ok(KvReaderDelAdd::new(obkv).get(DelAdd::Addition).unwrap_or_default())
|
||||
}
|
||||
|
||||
/// A function that merges a DelAdd of bitmao into an already existing bitmap.
|
||||
///
|
||||
/// The first argument is the DelAdd obkv of CboRoaringBitmaps and
|
||||
/// the second one is the CboRoaringBitmap to merge into.
|
||||
fn merge_deladd_cbo_roaring_bitmaps(
|
||||
deladd_obkv: &[u8],
|
||||
previous: &[u8],
|
||||
buffer: &mut Vec<u8>,
|
||||
) -> Result<()> {
|
||||
Ok(CboRoaringBitmapCodec::merge_deladd_into(
|
||||
KvReaderDelAdd::new(deladd_obkv),
|
||||
previous,
|
||||
buffer,
|
||||
)?)
|
||||
}
|
||||
|
||||
/// Write provided entries in database using serialize_value function.
|
||||
/// merge_values function is used if an entry already exist in the database.
|
||||
fn write_entries_into_database<R, K, V, FS, FM>(
|
||||
|
Loading…
Reference in New Issue
Block a user