Actually delete documents from DB when the merge function says so

This commit is contained in:
Louis Dureuil 2023-10-26 18:06:41 +02:00
parent 8e0d9c9a5e
commit 6260cff65f
No known key found for this signature in database
3 changed files with 24 additions and 32 deletions

View File

@ -102,11 +102,11 @@ impl CboRoaringBitmapCodec {
} }
/// Merges a DelAdd delta into a CboRoaringBitmap. /// Merges a DelAdd delta into a CboRoaringBitmap.
pub fn merge_deladd_into( pub fn merge_deladd_into<'a>(
deladd: KvReaderDelAdd<'_>, deladd: KvReaderDelAdd<'_>,
previous: &[u8], previous: &[u8],
buffer: &mut Vec<u8>, buffer: &'a mut Vec<u8>,
) -> io::Result<()> { ) -> io::Result<Option<&'a [u8]>> {
// Deserialize the bitmap that is already there // Deserialize the bitmap that is already there
let mut previous = Self::deserialize_from(previous)?; let mut previous = Self::deserialize_from(previous)?;
@ -120,7 +120,12 @@ impl CboRoaringBitmapCodec {
previous |= Self::deserialize_from(value)?; previous |= Self::deserialize_from(value)?;
} }
previous.serialize_into(buffer) if previous.is_empty() {
return Ok(None);
}
Self::serialize_into(&previous, buffer);
Ok(Some(&buffer[..]))
} }
} }

View File

@ -390,22 +390,7 @@ where
return Err(Error::InternalError(InternalError::AbortedIndexation)); return Err(Error::InternalError(InternalError::AbortedIndexation));
} }
let typed_chunk = match result? { let typed_chunk = result?;
TypedChunk::WordDocids {
word_docids_reader,
exact_word_docids_reader,
word_fid_docids_reader,
} => TypedChunk::WordDocids {
word_docids_reader,
exact_word_docids_reader,
word_fid_docids_reader,
},
TypedChunk::WordPairProximityDocids(chunk) => {
TypedChunk::WordPairProximityDocids(chunk)
}
TypedChunk::WordPositionDocids(chunk) => TypedChunk::WordPositionDocids(chunk),
otherwise => otherwise,
};
// FIXME: return newly added as well as newly deleted documents // FIXME: return newly added as well as newly deleted documents
let (docids, is_merged_database) = let (docids, is_merged_database) =

View File

@ -484,11 +484,11 @@ fn deladd_serialize_add_side<'a>(obkv: &'a [u8], _buffer: &mut Vec<u8>) -> Resul
/// ///
/// The first argument is the DelAdd obkv of CboRoaringBitmaps and /// The first argument is the DelAdd obkv of CboRoaringBitmaps and
/// the second one is the CboRoaringBitmap to merge into. /// the second one is the CboRoaringBitmap to merge into.
fn merge_deladd_cbo_roaring_bitmaps( fn merge_deladd_cbo_roaring_bitmaps<'a>(
deladd_obkv: &[u8], deladd_obkv: &[u8],
previous: &[u8], previous: &[u8],
buffer: &mut Vec<u8>, buffer: &'a mut Vec<u8>,
) -> Result<()> { ) -> Result<Option<&'a [u8]>> {
Ok(CboRoaringBitmapCodec::merge_deladd_into( Ok(CboRoaringBitmapCodec::merge_deladd_into(
KvReaderDelAdd::new(deladd_obkv), KvReaderDelAdd::new(deladd_obkv),
previous, previous,
@ -509,7 +509,7 @@ fn write_entries_into_database<R, K, V, FS, FM>(
where where
R: io::Read + io::Seek, R: io::Read + io::Seek,
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>, FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>, FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
{ {
puffin::profile_function!(format!("number of entries: {}", data.len())); puffin::profile_function!(format!("number of entries: {}", data.len()));
@ -521,17 +521,19 @@ where
if valid_lmdb_key(key) { if valid_lmdb_key(key) {
buffer.clear(); buffer.clear();
let value = if index_is_empty { let value = if index_is_empty {
serialize_value(value, &mut buffer)? Some(serialize_value(value, &mut buffer)?)
} else { } else {
match database.get(wtxn, key)? { match database.get(wtxn, key)? {
Some(prev_value) => { Some(prev_value) => merge_values(value, prev_value, &mut buffer)?,
merge_values(value, prev_value, &mut buffer)?; None => Some(serialize_value(value, &mut buffer)?),
&buffer[..]
}
None => serialize_value(value, &mut buffer)?,
} }
}; };
database.put(wtxn, key, value)?; match value {
Some(value) => database.put(wtxn, key, value)?,
None => {
database.delete(wtxn, key)?;
}
}
} }
} }
@ -553,7 +555,7 @@ fn append_entries_into_database<R, K, V, FS, FM>(
where where
R: io::Read + io::Seek, R: io::Read + io::Seek,
FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>, FS: for<'a> Fn(&'a [u8], &'a mut Vec<u8>) -> Result<&'a [u8]>,
FM: Fn(&[u8], &[u8], &mut Vec<u8>) -> Result<()>, FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec<u8>) -> Result<Option<&'a [u8]>>,
K: for<'a> heed::BytesDecode<'a>, K: for<'a> heed::BytesDecode<'a>,
{ {
puffin::profile_function!(format!("number of entries: {}", data.len())); puffin::profile_function!(format!("number of entries: {}", data.len()));