Actually delete documents from DB when the merge function says so

This commit is contained in:
Louis Dureuil 2023-10-26 18:06:41 +02:00
parent 8e0d9c9a5e
commit 6260cff65f
No known key found for this signature in database
3 changed files with 24 additions and 32 deletions

View File

@ -102,11 +102,11 @@ impl CboRoaringBitmapCodec {
}
/// Merges a DelAdd delta into a CboRoaringBitmap.
pub fn merge_deladd_into(
pub fn merge_deladd_into<'a>(
deladd: KvReaderDelAdd<'_>,
previous: &[u8],
buffer: &mut Vec<u8>,
) -> io::Result<()> {
buffer: &'a mut Vec<u8>,
) -> io::Result<Option<&'a [u8]>> {
// Deserialize the bitmap that is already there
let mut previous = Self::deserialize_from(previous)?;
@ -120,7 +120,12 @@ impl CboRoaringBitmapCodec {
previous |= Self::deserialize_from(value)?;
}
previous.serialize_into(buffer)
if previous.is_empty() {
return Ok(None);
}
Self::serialize_into(&previous, buffer);
Ok(Some(&buffer[..]))
}
}

View File

@ -390,22 +390,7 @@ where
return Err(Error::InternalError(InternalError::AbortedIndexation));
}
let typed_chunk = match result? {
TypedChunk::WordDocids {
word_docids_reader,
exact_word_docids_reader,
word_fid_docids_reader,
} => TypedChunk::WordDocids {
word_docids_reader,
exact_word_docids_reader,
word_fid_docids_reader,
},
TypedChunk::WordPairProximityDocids(chunk) => {
TypedChunk::WordPairProximityDocids(chunk)
}
TypedChunk::WordPositionDocids(chunk) => TypedChunk::WordPositionDocids(chunk),
otherwise => otherwise,
};
let typed_chunk = result?;
// FIXME: return newly added as well as newly deleted documents
let (docids, is_merged_database) =

View File

@ -484,11 +484,11 @@ fn deladd_serialize_add_side<'a>(obkv: &'a [u8], _buffer: &mut Vec<u8>) -> Resul
///
/// The first argument is the DelAdd obkv of CboRoaringBitmaps and
/// the second one is the CboRoaringBitmap to merge into.
fn merge_deladd_cbo_roaring_bitmaps(
fn merge_deladd_cbo_roaring_bitmaps<'a>(
deladd_obkv: &[u8],
previous: &[u8],
buffer: &mut Vec<u8>,
) -> Result<()> {
buffer: &'a mut Vec<u8>,
) -> Result<Option<&'a [u8]>> {
Ok(CboRoaringBitmapCodec::merge_deladd_into(
KvReaderDelAdd::new(deladd_obkv),
previous,
@ -509,7 +509,7 @@ fn write_entries_into_database<R, K, V, FS, FM>(
where
R: io::Read + io::Seek,
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
{
puffin::profile_function!(format!("number of entries: {}", data.len()));
@ -521,17 +521,19 @@ where
if valid_lmdb_key(key) {
buffer.clear();
let value = if index_is_empty {
serialize_value(value, &mut buffer)?
Some(serialize_value(value, &mut buffer)?)
} else {
match database.get(wtxn, key)? {
Some(prev_value) => {
merge_values(value, prev_value, &mut buffer)?;
&buffer[..]
}
None => serialize_value(value, &mut buffer)?,
Some(prev_value) => merge_values(value, prev_value, &mut buffer)?,
None => Some(serialize_value(value, &mut buffer)?),
}
};
database.put(wtxn, key, value)?;
match value {
Some(value) => database.put(wtxn, key, value)?,
None => {
database.delete(wtxn, key)?;
}
}
}
}
@ -553,7 +555,7 @@ fn append_entries_into_database<R, K, V, FS, FM>(
where
R: io::Read + io::Seek,
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>,
FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
K: for<'a> heed::BytesDecode<'a>,
{
puffin::profile_function!(format!("number of entries: {}", data.len()));