diff --git a/milli/src/heed_codec/facet/facet_level_value_u32_codec.rs b/milli/src/heed_codec/facet/facet_level_value_u32_codec.rs index 6b51b306e..597335b6e 100644 --- a/milli/src/heed_codec/facet/facet_level_value_u32_codec.rs +++ b/milli/src/heed_codec/facet/facet_level_value_u32_codec.rs @@ -2,7 +2,7 @@ use std::borrow::Cow; use std::convert::TryInto; use std::num::NonZeroU8; -use crate::FieldId; +use crate::{try_split_array_at, FieldId}; /// A codec that stores the field id, level 1 and higher and the groups ids. /// @@ -13,12 +13,13 @@ impl<'a> heed::BytesDecode<'a> for FacetLevelValueU32Codec { type DItem = (FieldId, NonZeroU8, u32, u32); fn bytes_decode(bytes: &'a [u8]) -> Option { - let (field_id, bytes) = bytes.split_first()?; + let (field_id_bytes, bytes) = try_split_array_at(bytes)?; + let field_id = u16::from_be_bytes(field_id_bytes); let (level, bytes) = bytes.split_first()?; let level = NonZeroU8::new(*level)?; - let left = bytes[16..20].try_into().ok().map(u32::from_be_bytes)?; - let right = bytes[20..].try_into().ok().map(u32::from_be_bytes)?; - Some((*field_id, level, left, right)) + let left = bytes[8..12].try_into().ok().map(u32::from_be_bytes)?; + let right = bytes[12..].try_into().ok().map(u32::from_be_bytes)?; + Some((field_id, level, left, right)) } } @@ -42,8 +43,8 @@ impl heed::BytesEncode<'_> for FacetLevelValueU32Codec { let bytes = right.to_be_bytes(); buffer[12..].copy_from_slice(&bytes[..]); - let mut bytes = Vec::with_capacity(buffer.len() + 2); - bytes.push(*field_id); + let mut bytes = Vec::with_capacity(buffer.len() + 2 + 1); + bytes.extend_from_slice(&field_id.to_be_bytes()); bytes.push(level.get()); bytes.extend_from_slice(&buffer); diff --git a/milli/src/heed_codec/facet/facet_string_level_zero_codec.rs b/milli/src/heed_codec/facet/facet_string_level_zero_codec.rs index 1c0c4be93..009c6454a 100644 --- a/milli/src/heed_codec/facet/facet_string_level_zero_codec.rs +++ b/milli/src/heed_codec/facet/facet_string_level_zero_codec.rs @@ -1,7 +1,7 @@ use std::borrow::Cow; use std::str; -use crate::FieldId; +use crate::{try_split_array_at, FieldId}; /// A codec that stores the field id, level 0, and facet string. /// @@ -16,7 +16,7 @@ pub struct FacetStringLevelZeroCodec; impl FacetStringLevelZeroCodec { pub fn serialize_into(field_id: FieldId, value: &str, out: &mut Vec) { out.reserve(value.len() + 2); - out.push(field_id); + out.extend_from_slice(&field_id.to_be_bytes()); out.push(0); // the level zero (for LMDB ordering only) out.extend_from_slice(value.as_bytes()); } @@ -26,7 +26,8 @@ impl<'a> heed::BytesDecode<'a> for FacetStringLevelZeroCodec { type DItem = (FieldId, &'a str); fn bytes_decode(bytes: &'a [u8]) -> Option { - let (field_id, bytes) = bytes.split_first()?; + let (field_id_bytes, bytes) = try_split_array_at(bytes)?; + let field_id = u16::from_be_bytes(field_id_bytes); let (level, bytes) = bytes.split_first()?; if *level != 0 { @@ -34,7 +35,7 @@ impl<'a> heed::BytesDecode<'a> for FacetStringLevelZeroCodec { } let value = str::from_utf8(bytes).ok()?; - Some((*field_id, value)) + Some((field_id, value)) } } diff --git a/milli/src/heed_codec/facet/mod.rs b/milli/src/heed_codec/facet/mod.rs index 90dc79134..ecab7eb7c 100644 --- a/milli/src/heed_codec/facet/mod.rs +++ b/milli/src/heed_codec/facet/mod.rs @@ -2,7 +2,6 @@ mod facet_level_value_f64_codec; mod facet_level_value_u32_codec; mod facet_string_level_zero_codec; mod facet_string_zero_bounds_value_codec; -mod facet_value_string_codec; mod field_doc_id_facet_f64_codec; mod field_doc_id_facet_string_codec; @@ -10,6 +9,5 @@ pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec; pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec; pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec; pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec; -pub use self::facet_value_string_codec::FacetValueStringCodec; pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec; pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec; diff --git a/milli/src/index.rs b/milli/src/index.rs index 099a5891d..b2be10767 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -11,7 +11,7 @@ use roaring::RoaringBitmap; use crate::error::{FieldIdMapMissingEntry, InternalError, UserError}; use crate::fields_ids_map::FieldsIdsMap; use crate::heed_codec::facet::{ - FacetLevelValueF64Codec, FacetValueStringCodec, FieldDocIdFacetF64Codec, + FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, }; use crate::{ @@ -91,7 +91,7 @@ pub struct Index { /// Maps the facet field id, level and the number with the docids that corresponds to it. pub facet_id_f64_docids: Database, /// Maps the facet field id and the string with the docids that corresponds to it. - pub facet_id_string_docids: Database, + pub facet_id_string_docids: Database, /// Maps the document id, the facet field id and the numbers. pub field_id_docid_facet_f64s: Database, diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index 080fd9af7..ceefe785b 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -1,6 +1,6 @@ use std::collections::{BTreeMap, HashSet}; use std::ops::Bound::Unbounded; -use std::{cmp, fmt}; +use std::{cmp, fmt, mem}; use heed::types::{ByteSlice, Unit}; use heed::{BytesDecode, Database}; @@ -8,7 +8,7 @@ use roaring::RoaringBitmap; use crate::error::{FieldIdMapMissingEntry, UserError}; use crate::facet::FacetType; -use crate::heed_codec::facet::FacetValueStringCodec; +use crate::heed_codec::facet::FacetStringLevelZeroCodec; use crate::search::facet::{FacetNumberIter, FacetNumberRange}; use crate::{DocumentId, FieldId, Index, Result}; @@ -81,7 +81,7 @@ impl<'a> FacetDistribution<'a> { let mut key_buffer: Vec<_> = field_id.to_be_bytes().iter().copied().collect(); for docid in candidates.into_iter().take(CANDIDATES_THRESHOLD as usize) { - key_buffer.truncate(1); + key_buffer.truncate(mem::size_of::()); key_buffer.extend_from_slice(&docid.to_be_bytes()); let iter = db .remap_key_type::() @@ -158,7 +158,7 @@ impl<'a> FacetDistribution<'a> { .facet_id_string_docids .remap_key_type::() .prefix_iter(self.rtxn, &field_id.to_be_bytes())? - .remap_key_type::(); + .remap_key_type::(); for result in iter { let ((_, value), docids) = result?; diff --git a/milli/src/search/facet/facet_string.rs b/milli/src/search/facet/facet_string.rs index 09781f883..d0d9b54eb 100644 --- a/milli/src/search/facet/facet_string.rs +++ b/milli/src/search/facet/facet_string.rs @@ -130,7 +130,7 @@ use std::ops::Bound; use std::ops::Bound::{Excluded, Included, Unbounded}; use either::{Either, Left, Right}; -use heed::types::{ByteSlice, DecodeIgnore, Str}; +use heed::types::{ByteSlice, DecodeIgnore}; use heed::{Database, LazyDecode, RoRange}; use roaring::RoaringBitmap; @@ -298,10 +298,10 @@ impl<'t> FacetStringIter<'t> { ) -> heed::Result> { Ok(db .remap_types::() - .prefix_iter(rtxn, &[fid][..])? // the field id is the first bit + .prefix_iter(rtxn, &fid.to_be_bytes())? // the field id is the first two bits .last() .transpose()? - .map(|(key_bytes, _)| key_bytes[1])) // the level is the second bit + .map(|(key_bytes, _)| key_bytes[2])) // the level is the third bit } } diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 875fe3b27..c5ecb5a79 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -17,7 +17,7 @@ use self::Operator::*; use super::parser::{FilterParser, Rule, PREC_CLIMBER}; use super::FacetNumberRange; use crate::error::UserError; -use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetValueStringCodec}; +use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetStringLevelZeroCodec}; use crate::{CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result}; #[derive(Debug, Clone, PartialEq)] @@ -363,7 +363,7 @@ impl FilterCondition { rtxn: &heed::RoTxn, index: &Index, numbers_db: heed::Database, - strings_db: heed::Database, + strings_db: heed::Database, field_id: FieldId, operator: &Operator, ) -> Result { diff --git a/milli/src/update/index_documents/store.rs b/milli/src/update/index_documents/store.rs index ebf365f44..f0225ff43 100644 --- a/milli/src/update/index_documents/store.rs +++ b/milli/src/update/index_documents/store.rs @@ -26,7 +26,7 @@ use super::merge_function::{ use super::{create_sorter, create_writer, writer_into_reader, MergeFn}; use crate::error::{Error, InternalError, SerializationError}; use crate::heed_codec::facet::{ - FacetLevelValueF64Codec, FacetValueStringCodec, FieldDocIdFacetF64Codec, + FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, }; use crate::heed_codec::{BoRoaringBitmapCodec, CboRoaringBitmapCodec}; @@ -522,7 +522,7 @@ impl<'s, A: AsRef<[u8]>> Store<'s, A> { key_buffer.clear(); data_buffer.clear(); - FacetValueStringCodec::serialize_into(field_id, &value, &mut key_buffer); + FacetStringLevelZeroCodec::serialize_into(field_id, &value, &mut key_buffer); CboRoaringBitmapCodec::serialize_into(&docids, &mut data_buffer); if lmdb_key_valid_size(&key_buffer) {