diff --git a/milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs b/milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs index 3c2ce4657..6161118b6 100644 --- a/milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs +++ b/milli/src/heed_codec/facet/facet_string_zero_bounds_value_codec.rs @@ -16,7 +16,7 @@ where type DItem = (Option<(&'a str, &'a str)>, C::DItem); fn bytes_decode(bytes: &'a [u8]) -> Option { - let (contains_bounds, tail_bytes) = bytes.split_first()?; + let (contains_bounds, bytes) = bytes.split_first()?; if *contains_bounds != 0 { let (left_len, bytes) = try_split_at(bytes, 2)?; @@ -33,7 +33,7 @@ where C::bytes_decode(bytes).map(|item| (Some((left, right)), item)) } else { - C::bytes_decode(tail_bytes).map(|item| (None, item)) + C::bytes_decode(bytes).map(|item| (None, item)) } } } @@ -49,11 +49,21 @@ where match bounds { Some((left, right)) => { + bytes.push(u8::max_value()); + + if left.is_empty() || right.is_empty() { + return None; + } + let left_len: u16 = left.len().try_into().ok()?; let right_len: u16 = right.len().try_into().ok()?; + bytes.extend_from_slice(&left_len.to_be_bytes()); bytes.extend_from_slice(&right_len.to_be_bytes()); + bytes.extend_from_slice(left.as_bytes()); + bytes.extend_from_slice(right.as_bytes()); + let value_bytes = C::bytes_encode(&value)?; bytes.extend_from_slice(&value_bytes[..]); @@ -78,3 +88,35 @@ fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> { None } } + +#[cfg(test)] +mod tests { + use heed::types::Unit; + use heed::{BytesDecode, BytesEncode}; + use roaring::RoaringBitmap; + + use super::*; + use crate::CboRoaringBitmapCodec; + + #[test] + fn deserialize_roaring_bitmaps() { + let bounds = Some(("abc", "def")); + let docids: RoaringBitmap = (0..100).chain(3500..4398).collect(); + let key = (bounds, docids.clone()); + let bytes = + FacetStringZeroBoundsValueCodec::::bytes_encode(&key).unwrap(); + let (out_bounds, out_docids) = + FacetStringZeroBoundsValueCodec::::bytes_decode(&bytes).unwrap(); + assert_eq!((out_bounds, out_docids), (bounds, docids)); + } + + #[test] + fn deserialize_unit() { + let bounds = Some(("abc", "def")); + let key = (bounds, ()); + let bytes = FacetStringZeroBoundsValueCodec::::bytes_encode(&key).unwrap(); + let (out_bounds, out_unit) = + FacetStringZeroBoundsValueCodec::::bytes_decode(&bytes).unwrap(); + assert_eq!((out_bounds, out_unit), (bounds, ())); + } +} diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs index f0225ff43..4c1071aab 100644 --- a/milli/src/update/index_documents/store.rs +++ b/milli/src/update/index_documents/store.rs @@ -286,6 +286,10 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { value: String, id: DocumentId, ) -> Result<()> { + if value.is_empty() { + return Ok(()); + } + let sorter = &mut self.field_id_docid_facet_strings_sorter; Self::write_field_id_docid_facet_string_value(sorter, field_id, id, &value)?;