mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Merge #712
712: Fix bulk facet indexing bug r=Kerollmops a=loiclec # Pull Request ## Related issue Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/3165 ## What does this PR do? Fixes a bug where indexing certain numbers of filterable attribute values in bulk led to corrupted facet databases. This was due to a lossy integer conversion which would ultimately prevent entire levels of the facet database to be written into LMDB. More specifically, this change was made: ```diff - if cur_writer_len as u8 >= self.min_level_size { + if cur_writer_len >= self.min_level_size as usize { ``` I also checked other comparisons to `min_level_size` and other conversions such as `x as u8` in this part of the codebase. Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
This commit is contained in:
commit
e1612fcb01
@ -288,6 +288,8 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
for bitmap in sub_bitmaps {
|
||||
combined_bitmap |= bitmap;
|
||||
}
|
||||
// The conversion of sub_bitmaps.len() to a u8 will always be correct
|
||||
// since its length is bounded by max_group_size, which is a u8.
|
||||
group_sizes.push(sub_bitmaps.len() as u8);
|
||||
left_bounds.push(left_bound);
|
||||
|
||||
@ -340,7 +342,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
}
|
||||
}
|
||||
// if we inserted enough elements to reach the minimum level size, then we push the writer
|
||||
if cur_writer_len as u8 >= self.min_level_size {
|
||||
if cur_writer_len >= self.min_level_size as usize {
|
||||
sub_writers.push(writer_into_reader(cur_writer)?);
|
||||
} else {
|
||||
// otherwise, if there are still leftover elements, we give them to the level above
|
||||
@ -357,11 +359,15 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
mod tests {
|
||||
use std::iter::once;
|
||||
|
||||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::documents::documents_batch_reader_from_objects;
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::milli_snap;
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
use crate::{db_snap, milli_snap};
|
||||
|
||||
#[test]
|
||||
fn insert() {
|
||||
@ -443,4 +449,46 @@ mod tests {
|
||||
test("large_group_small_min_level", 16, 2);
|
||||
test("odd_group_odd_min_level", 7, 3);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bug_3165() {
|
||||
// Indexing a number of facet values that falls within certains ranges (e.g. 22_540 qualifies)
|
||||
// would lead to a facet DB which was missing some levels.
|
||||
// That was because before writing a level into the database, we would
|
||||
// check that its size was higher than the minimum level size using
|
||||
// a lossy integer conversion: `level_size as u8 >= min_level_size`.
|
||||
//
|
||||
// This missing level in the facet DBs would make the incremental indexer
|
||||
// (and other search algorithms) crash.
|
||||
//
|
||||
// https://github.com/meilisearch/meilisearch/issues/3165
|
||||
let index = TempIndex::new_with_map_size(4096 * 1000 * 100);
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("id".to_owned());
|
||||
settings.set_filterable_fields(hashset! { S("id") });
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
let mut documents = vec![];
|
||||
for i in 0..=22_540 {
|
||||
documents.push(
|
||||
serde_json::json! {
|
||||
{
|
||||
"id": i as u64,
|
||||
}
|
||||
}
|
||||
.as_object()
|
||||
.unwrap()
|
||||
.clone(),
|
||||
);
|
||||
}
|
||||
|
||||
let documents = documents_batch_reader_from_objects(documents);
|
||||
index.add_documents(documents).unwrap();
|
||||
|
||||
db_snap!(index, facet_id_f64_docids, "initial", @"c34f499261f3510d862fa0283bbe843a");
|
||||
db_snap!(index, number_faceted_documents_ids, "initial", @"01594fecbb316798ce3651d6730a4521");
|
||||
}
|
||||
}
|
||||
|
@ -436,6 +436,8 @@ impl FacetsUpdateIncrementalInner {
|
||||
level: highest_level + 1,
|
||||
left_bound: first_key.unwrap().left_bound,
|
||||
};
|
||||
// Note: nbr_leftover_elements can be casted to a u8 since it is bounded by `max_group_size`
|
||||
// when it is created above.
|
||||
let value = FacetGroupValue { size: nbr_leftover_elements as u8, bitmap: values };
|
||||
to_add.push((key.into_owned(), value));
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user