5413: Make sure to delete useless prefixes r=ManyTheFish a=Kerollmops

We discovered a bug where the new indexer was still writing empty roaring bitmaps instead of deleting the prefix entry from the prefix database.

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-03-12 10:54:04 +00:00 committed by GitHub
commit 1cd00f37c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -205,15 +205,22 @@ impl WordPrefixIntegerDocids {
let (ref mut index, ref mut file, ref mut buffer) = *refmut;
for (&pos, bitmaps_bytes) in frozen.bitmaps(prefix).unwrap() {
let output = bitmaps_bytes
.iter()
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
.union()?;
buffer.clear();
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
index.push(PrefixIntegerEntry { prefix, pos, serialized_length: buffer.len() });
file.write_all(buffer)?;
if bitmaps_bytes.is_empty() {
index.push(PrefixIntegerEntry { prefix, pos, serialized_length: None });
} else {
let output = bitmaps_bytes
.iter()
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
.union()?;
buffer.clear();
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
index.push(PrefixIntegerEntry {
prefix,
pos,
serialized_length: Some(buffer.len()),
});
file.write_all(buffer)?;
}
}
Result::Ok(())
@ -230,14 +237,24 @@ impl WordPrefixIntegerDocids {
file.rewind()?;
let mut file = BufReader::new(file);
for PrefixIntegerEntry { prefix, pos, serialized_length } in index {
buffer.resize(serialized_length, 0);
file.read_exact(&mut buffer)?;
key_buffer.clear();
key_buffer.extend_from_slice(prefix.as_bytes());
key_buffer.push(0);
key_buffer.extend_from_slice(&pos.to_be_bytes());
self.prefix_database.remap_data_type::<Bytes>().put(wtxn, &key_buffer, &buffer)?;
match serialized_length {
Some(serialized_length) => {
buffer.resize(serialized_length, 0);
file.read_exact(&mut buffer)?;
self.prefix_database.remap_data_type::<Bytes>().put(
wtxn,
&key_buffer,
&buffer,
)?;
}
None => {
self.prefix_database.delete(wtxn, &key_buffer)?;
}
}
}
}
@ -249,7 +266,7 @@ impl WordPrefixIntegerDocids {
struct PrefixIntegerEntry<'a> {
prefix: &'a str,
pos: u16,
serialized_length: usize,
serialized_length: Option<usize>,
}
/// TODO doc