From d23c250ad5928878a6db1f61272a6fec7093a1dd Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 13 Jul 2021 20:04:48 +0200 Subject: [PATCH] Fix a bound error in the facet string range construction --- milli/src/search/facet/facet_distribution.rs | 4 +- milli/src/search/facet/facet_string.rs | 51 +++++++++----------- 2 files changed, 26 insertions(+), 29 deletions(-) diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 6382e15e1..fef4ecc87 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -22,7 +22,7 @@ const MAX_VALUES_BY_FACET: usize = 1000; /// Threshold on the number of candidates that will make /// the system to choose between one algorithm or another. -const CANDIDATES_THRESHOLD: u64 = 1000; +const CANDIDATES_THRESHOLD: u64 = 35_000; pub struct FacetDistribution<'a> { facets: Option>, @@ -80,7 +80,7 @@ impl<'a> FacetDistribution<'a> { { let mut key_buffer: Vec<_> = field_id.to_be_bytes().iter().copied().collect(); - for docid in candidates.into_iter().take(CANDIDATES_THRESHOLD as usize) { + for docid in candidates.into_iter() { key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = db diff --git a/milli/src/search/facet/facet_string.rs b/milli/src/search/facet/facet_string.rs index e1fe6ab74..f0b527104 100644 --- a/milli/src/search/facet/facet_string.rs +++ b/milli/src/search/facet/facet_string.rs @@ -220,36 +220,34 @@ impl<'t> FacetStringLevelZeroRange<'t> { left: Bound<&str>, right: Bound<&str>, ) -> heed::Result> { - fn encode_bound<'a>( - buffer: &'a mut Vec, - field_id: FieldId, - bound: Bound<&str>, - ) -> Bound<&'a [u8]> { - match bound { - Included(value) => { - buffer.extend_from_slice(&field_id.to_be_bytes()); - buffer.push(0); - buffer.extend_from_slice(value.as_bytes()); - Included(&buffer[..]) - } - Excluded(value) => { - buffer.extend_from_slice(&field_id.to_be_bytes()); - buffer.push(0); - buffer.extend_from_slice(value.as_bytes()); - Excluded(&buffer[..]) - } - Unbounded => { - buffer.extend_from_slice(&field_id.to_be_bytes()); - buffer.push(1); // we must only get the level 0 - Excluded(&buffer[..]) - } - } + fn encode_value<'a>(buffer: &'a mut Vec, field_id: FieldId, value: &str) -> &'a [u8] { + buffer.extend_from_slice(&field_id.to_be_bytes()); + buffer.push(0); + buffer.extend_from_slice(value.as_bytes()); + &buffer[..] } let mut left_buffer = Vec::new(); + let left_bound = match left { + Included(value) => Included(encode_value(&mut left_buffer, field_id, value)), + Excluded(value) => Excluded(encode_value(&mut left_buffer, field_id, value)), + Unbounded => { + left_buffer.extend_from_slice(&field_id.to_be_bytes()); + left_buffer.push(0); + Included(&left_buffer[..]) + } + }; + let mut right_buffer = Vec::new(); - let left_bound = encode_bound(&mut left_buffer, field_id, left); - let right_bound = encode_bound(&mut right_buffer, field_id, right); + let right_bound = match right { + Included(value) => Included(encode_value(&mut right_buffer, field_id, value)), + Excluded(value) => Excluded(encode_value(&mut right_buffer, field_id, value)), + Unbounded => { + right_buffer.extend_from_slice(&field_id.to_be_bytes()); + right_buffer.push(1); // we must only get the level 0 + Excluded(&right_buffer[..]) + } + }; let iter = db .remap_key_type::() @@ -290,7 +288,6 @@ impl<'t> FacetStringIter<'t> { field_id: FieldId, documents_ids: RoaringBitmap, ) -> heed::Result> { - // TODO make sure that we change the database before using it, or merging the PR. let db = index.facet_id_string_docids.remap_types::(); let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); let highest_iter = match NonZeroU8::new(highest_level) {