From 6260cff65ff435aae61878c45275e0d8922546c9 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 26 Oct 2023 18:06:41 +0200 Subject: [PATCH] Actually delete documents from DB when the merge function says so --- .../cbo_roaring_bitmap_codec.rs | 13 +++++++--- milli/src/update/index_documents/mod.rs | 17 +----------- .../src/update/index_documents/typed_chunk.rs | 26 ++++++++++--------- 3 files changed, 24 insertions(+), 32 deletions(-) diff --git a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs index 117da1308..f635e55af 100644 --- a/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs +++ b/milli/src/heed_codec/roaring_bitmap/cbo_roaring_bitmap_codec.rs @@ -102,11 +102,11 @@ impl CboRoaringBitmapCodec { } /// Merges a DelAdd delta into a CboRoaringBitmap. - pub fn merge_deladd_into( + pub fn merge_deladd_into<'a>( deladd: KvReaderDelAdd<'_>, previous: &[u8], - buffer: &mut Vec, - ) -> io::Result<()> { + buffer: &'a mut Vec, + ) -> io::Result> { // Deserialize the bitmap that is already there let mut previous = Self::deserialize_from(previous)?; @@ -120,7 +120,12 @@ impl CboRoaringBitmapCodec { previous |= Self::deserialize_from(value)?; } - previous.serialize_into(buffer) + if previous.is_empty() { + return Ok(None); + } + + Self::serialize_into(&previous, buffer); + Ok(Some(&buffer[..])) } } diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index b439ca409..45ceec7b0 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -390,22 +390,7 @@ where return Err(Error::InternalError(InternalError::AbortedIndexation)); } - let typed_chunk = match result? { - TypedChunk::WordDocids { - word_docids_reader, - exact_word_docids_reader, - word_fid_docids_reader, - } => TypedChunk::WordDocids { - word_docids_reader, - exact_word_docids_reader, - word_fid_docids_reader, - }, - TypedChunk::WordPairProximityDocids(chunk) => { - TypedChunk::WordPairProximityDocids(chunk) - } - TypedChunk::WordPositionDocids(chunk) => TypedChunk::WordPositionDocids(chunk), - otherwise => otherwise, - }; + let typed_chunk = result?; // FIXME: return newly added as well as newly deleted documents let (docids, is_merged_database) = diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 1f1ac4adf..8257f7c93 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -484,11 +484,11 @@ fn deladd_serialize_add_side<'a>(obkv: &'a [u8], _buffer: &mut Vec) -> Resul /// /// The first argument is the DelAdd obkv of CboRoaringBitmaps and /// the second one is the CboRoaringBitmap to merge into. -fn merge_deladd_cbo_roaring_bitmaps( +fn merge_deladd_cbo_roaring_bitmaps<'a>( deladd_obkv: &[u8], previous: &[u8], - buffer: &mut Vec, -) -> Result<()> { + buffer: &'a mut Vec, +) -> Result> { Ok(CboRoaringBitmapCodec::merge_deladd_into( KvReaderDelAdd::new(deladd_obkv), previous, @@ -509,7 +509,7 @@ fn write_entries_into_database( where R: io::Read + io::Seek, FS: for<'a> Fn(&'a [u8], &'a mut Vec) -> Result<&'a [u8]>, - FM: Fn(&[u8], &[u8], &mut Vec) -> Result<()>, + FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec) -> Result>, { puffin::profile_function!(format!("number of entries: {}", data.len())); @@ -521,17 +521,19 @@ where if valid_lmdb_key(key) { buffer.clear(); let value = if index_is_empty { - serialize_value(value, &mut buffer)? + Some(serialize_value(value, &mut buffer)?) } else { match database.get(wtxn, key)? { - Some(prev_value) => { - merge_values(value, prev_value, &mut buffer)?; - &buffer[..] - } - None => serialize_value(value, &mut buffer)?, + Some(prev_value) => merge_values(value, prev_value, &mut buffer)?, + None => Some(serialize_value(value, &mut buffer)?), } }; - database.put(wtxn, key, value)?; + match value { + Some(value) => database.put(wtxn, key, value)?, + None => { + database.delete(wtxn, key)?; + } + } } } @@ -553,7 +555,7 @@ fn append_entries_into_database( where R: io::Read + io::Seek, FS: for<'a> Fn(&'a [u8], &'a mut Vec) -> Result<&'a [u8]>, - FM: Fn(&[u8], &[u8], &mut Vec) -> Result<()>, + FM: for<'a> Fn(&[u8], &[u8], &'a mut Vec) -> Result>, K: for<'a> heed::BytesDecode<'a>, { puffin::profile_function!(format!("number of entries: {}", data.len()));