5413: Make sure to delete useless prefixes r=ManyTheFish a=Kerollmops

We discovered a bug where the new indexer was still writing empty roaring bitmaps instead of deleting the prefix entry from the prefix database.

Co-authored-by: Kerollmops <clement@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2025-03-12 10:54:04 +00:00 committed by GitHub
commit 1cd00f37c0
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194

View File

@ -205,16 +205,23 @@ impl WordPrefixIntegerDocids {
let (ref mut index, ref mut file, ref mut buffer) = *refmut; let (ref mut index, ref mut file, ref mut buffer) = *refmut;
for (&pos, bitmaps_bytes) in frozen.bitmaps(prefix).unwrap() { for (&pos, bitmaps_bytes) in frozen.bitmaps(prefix).unwrap() {
if bitmaps_bytes.is_empty() {
index.push(PrefixIntegerEntry { prefix, pos, serialized_length: None });
} else {
let output = bitmaps_bytes let output = bitmaps_bytes
.iter() .iter()
.map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes)) .map(|bytes| CboRoaringBitmapCodec::deserialize_from(bytes))
.union()?; .union()?;
buffer.clear(); buffer.clear();
CboRoaringBitmapCodec::serialize_into_vec(&output, buffer); CboRoaringBitmapCodec::serialize_into_vec(&output, buffer);
index.push(PrefixIntegerEntry { prefix, pos, serialized_length: buffer.len() }); index.push(PrefixIntegerEntry {
prefix,
pos,
serialized_length: Some(buffer.len()),
});
file.write_all(buffer)?; file.write_all(buffer)?;
} }
}
Result::Ok(()) Result::Ok(())
})?; })?;
@ -230,14 +237,24 @@ impl WordPrefixIntegerDocids {
file.rewind()?; file.rewind()?;
let mut file = BufReader::new(file); let mut file = BufReader::new(file);
for PrefixIntegerEntry { prefix, pos, serialized_length } in index { for PrefixIntegerEntry { prefix, pos, serialized_length } in index {
buffer.resize(serialized_length, 0);
file.read_exact(&mut buffer)?;
key_buffer.clear(); key_buffer.clear();
key_buffer.extend_from_slice(prefix.as_bytes()); key_buffer.extend_from_slice(prefix.as_bytes());
key_buffer.push(0); key_buffer.push(0);
key_buffer.extend_from_slice(&pos.to_be_bytes()); key_buffer.extend_from_slice(&pos.to_be_bytes());
self.prefix_database.remap_data_type::<Bytes>().put(wtxn, &key_buffer, &buffer)?; match serialized_length {
Some(serialized_length) => {
buffer.resize(serialized_length, 0);
file.read_exact(&mut buffer)?;
self.prefix_database.remap_data_type::<Bytes>().put(
wtxn,
&key_buffer,
&buffer,
)?;
}
None => {
self.prefix_database.delete(wtxn, &key_buffer)?;
}
}
} }
} }
@ -249,7 +266,7 @@ impl WordPrefixIntegerDocids {
struct PrefixIntegerEntry<'a> { struct PrefixIntegerEntry<'a> {
prefix: &'a str, prefix: &'a str,
pos: u16, pos: u16,
serialized_length: usize, serialized_length: Option<usize>,
} }
/// TODO doc /// TODO doc