From 485a72306d6e599f5d602887a0fa02822087527d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 5 Sep 2022 13:01:36 +0200 Subject: [PATCH] Refactor facet-related codecs --- milli/src/heed_codec/facet/mod.rs | 134 +++++++++++++++--- milli/src/heed_codec/facet/new/mod.rs | 120 ---------------- .../facet/{new => }/ordered_f64_codec.rs | 0 .../src/heed_codec/facet/{new => }/str_ref.rs | 0 milli/src/index.rs | 17 +-- milli/src/search/criteria/asc_desc.rs | 6 +- milli/src/search/distinct/facet_distinct.rs | 6 +- milli/src/search/facet/facet_distribution.rs | 15 +- .../search/facet/facet_distribution_iter.rs | 16 +-- milli/src/search/facet/facet_range_search.rs | 32 ++--- .../src/search/facet/facet_sort_ascending.rs | 20 +-- .../src/search/facet/facet_sort_descending.rs | 34 ++--- milli/src/search/facet/filter.rs | 16 ++- milli/src/search/facet/mod.rs | 22 +-- milli/src/snapshot_tests.rs | 6 +- milli/src/update/delete_documents.rs | 6 +- milli/src/update/facet/bulk.rs | 32 ++--- milli/src/update/facet/incremental.rs | 52 +++---- milli/src/update/facet/mod.rs | 27 ++-- .../extract/extract_facet_number_docids.rs | 8 +- .../extract/extract_facet_string_docids.rs | 8 +- .../word_pair_proximity_docids.hash.snap | 4 + 22 files changed, 280 insertions(+), 301 deletions(-) delete mode 100644 milli/src/heed_codec/facet/new/mod.rs rename milli/src/heed_codec/facet/{new => }/ordered_f64_codec.rs (100%) rename milli/src/heed_codec/facet/{new => }/str_ref.rs (100%) create mode 100644 milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/initial/word_pair_proximity_docids.hash.snap diff --git a/milli/src/heed_codec/facet/mod.rs b/milli/src/heed_codec/facet/mod.rs index e145e311e..299aeceb4 100644 --- a/milli/src/heed_codec/facet/mod.rs +++ b/milli/src/heed_codec/facet/mod.rs @@ -1,25 +1,19 @@ -// mod facet_level_value_f64_codec; -// mod facet_level_value_u32_codec; -// mod facet_string_level_zero_codec; -// mod facet_string_level_zero_value_codec; -// mod facet_string_zero_bounds_value_codec; mod field_doc_id_facet_f64_codec; mod field_doc_id_facet_string_codec; +mod ordered_f64_codec; +mod str_ref; -pub mod new; - -use heed::types::OwnedType; - -// pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec; -// pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec; -// pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec; -// pub use self::facet_string_level_zero_value_codec::{ -// decode_prefix_string, encode_prefix_string, FacetStringLevelZeroValueCodec, -// }; -// pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec; pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec; pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec; -use crate::BEU16; +pub use self::ordered_f64_codec::OrderedF64Codec; +pub use self::str_ref::StrRefCodec; +use crate::{CboRoaringBitmapCodec, BEU16}; +use heed::types::OwnedType; +use heed::{BytesDecode, BytesEncode}; +use roaring::RoaringBitmap; +use std::borrow::Cow; +use std::convert::TryFrom; +use std::marker::PhantomData; pub type FieldIdCodec = OwnedType; @@ -32,3 +26,109 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> { None } } + +#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub struct FacetGroupKey { + pub field_id: u16, + pub level: u8, + pub left_bound: T, +} +impl<'a> FacetGroupKey<&'a [u8]> { + pub fn into_owned(self) -> FacetGroupKey> { + FacetGroupKey { + field_id: self.field_id, + level: self.level, + left_bound: self.left_bound.to_vec(), + } + } +} + +impl<'a> FacetGroupKey> { + pub fn as_ref(&self) -> FacetGroupKey<&[u8]> { + FacetGroupKey { + field_id: self.field_id, + level: self.level, + left_bound: self.left_bound.as_slice(), + } + } +} + +#[derive(Debug)] +pub struct FacetGroupValue { + pub size: u8, + pub bitmap: RoaringBitmap, +} + +pub struct FacetGroupKeyCodec { + _phantom: PhantomData, +} + +impl<'a, T> heed::BytesEncode<'a> for FacetGroupKeyCodec +where + T: BytesEncode<'a>, + T::EItem: Sized, +{ + type EItem = FacetGroupKey; + + fn bytes_encode(value: &'a Self::EItem) -> Option> { + let mut v = vec![]; + v.extend_from_slice(&value.field_id.to_be_bytes()); + v.extend_from_slice(&[value.level]); + + let bound = T::bytes_encode(&value.left_bound)?; + v.extend_from_slice(&bound); + + Some(Cow::Owned(v)) + } +} +impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec +where + T: BytesDecode<'a>, +{ + type DItem = FacetGroupKey; + + fn bytes_decode(bytes: &'a [u8]) -> Option { + let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?); + let level = bytes[2]; + let bound = T::bytes_decode(&bytes[3..])?; + Some(FacetGroupKey { field_id: fid, level, left_bound: bound }) + } +} + +pub struct FacetGroupValueCodec; +impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { + type EItem = FacetGroupValue; + + fn bytes_encode(value: &'a Self::EItem) -> Option> { + let mut v = vec![]; + v.push(value.size); + CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v); + Some(Cow::Owned(v)) + } +} +impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { + type DItem = FacetGroupValue; + fn bytes_decode(bytes: &'a [u8]) -> Option { + let size = bytes[0]; + let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?; + Some(FacetGroupValue { size, bitmap }) + } +} + +pub struct ByteSliceRef; + +impl<'a> BytesEncode<'a> for ByteSliceRef { + type EItem = &'a [u8]; + + fn bytes_encode(item: &'a Self::EItem) -> Option> { + Some(Cow::Borrowed(item)) + } +} + +impl<'a> BytesDecode<'a> for ByteSliceRef { + type DItem = &'a [u8]; + + fn bytes_decode(bytes: &'a [u8]) -> Option { + Some(bytes) + } +} diff --git a/milli/src/heed_codec/facet/new/mod.rs b/milli/src/heed_codec/facet/new/mod.rs deleted file mode 100644 index bcb2957fc..000000000 --- a/milli/src/heed_codec/facet/new/mod.rs +++ /dev/null @@ -1,120 +0,0 @@ -use std::borrow::Cow; -use std::convert::TryFrom; -use std::marker::PhantomData; - -use heed::{BytesDecode, BytesEncode}; -use roaring::RoaringBitmap; - -use crate::CboRoaringBitmapCodec; - -pub mod ordered_f64_codec; -pub mod str_ref; -// TODO: these codecs were quickly written and not fast/resilient enough - -#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] -pub struct FacetKey { - pub field_id: u16, - pub level: u8, - pub left_bound: T, -} -impl<'a> FacetKey<&'a [u8]> { - pub fn into_owned(self) -> FacetKey> { - FacetKey { - field_id: self.field_id, - level: self.level, - left_bound: self.left_bound.to_vec(), - } - } -} - -impl<'a> FacetKey> { - pub fn as_ref(&self) -> FacetKey<&[u8]> { - FacetKey { - field_id: self.field_id, - level: self.level, - left_bound: self.left_bound.as_slice(), - } - } -} - -#[derive(Debug)] -pub struct FacetGroupValue { - pub size: u8, - pub bitmap: RoaringBitmap, -} - -pub struct FacetKeyCodec { - _phantom: PhantomData, -} - -impl<'a, T> heed::BytesEncode<'a> for FacetKeyCodec -where - T: BytesEncode<'a>, - T::EItem: Sized, -{ - type EItem = FacetKey; - - fn bytes_encode(value: &'a Self::EItem) -> Option> { - let mut v = vec![]; - v.extend_from_slice(&value.field_id.to_be_bytes()); - v.extend_from_slice(&[value.level]); - - let bound = T::bytes_encode(&value.left_bound)?; - v.extend_from_slice(&bound); - - Some(Cow::Owned(v)) - } -} -impl<'a, T> heed::BytesDecode<'a> for FacetKeyCodec -where - T: BytesDecode<'a>, -{ - type DItem = FacetKey; - - fn bytes_decode(bytes: &'a [u8]) -> Option { - let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?); - let level = bytes[2]; - let bound = T::bytes_decode(&bytes[3..])?; - Some(FacetKey { field_id: fid, level, left_bound: bound }) - } -} - -pub struct FacetGroupValueCodec; -impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec { - type EItem = FacetGroupValue; - - fn bytes_encode(value: &'a Self::EItem) -> Option> { - let mut v = vec![]; - v.push(value.size); - CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v); - Some(Cow::Owned(v)) - } -} -impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec { - type DItem = FacetGroupValue; - fn bytes_decode(bytes: &'a [u8]) -> Option { - let size = bytes[0]; - let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?; - Some(FacetGroupValue { size, bitmap }) - } -} - -// TODO: get rid of this codec as it is named confusingly + should really be part of heed -// or even replace the current ByteSlice codec -pub struct MyByteSlice; - -impl<'a> BytesEncode<'a> for MyByteSlice { - type EItem = &'a [u8]; - - fn bytes_encode(item: &'a Self::EItem) -> Option> { - Some(Cow::Borrowed(item)) - } -} - -impl<'a> BytesDecode<'a> for MyByteSlice { - type DItem = &'a [u8]; - - fn bytes_decode(bytes: &'a [u8]) -> Option { - Some(bytes) - } -} diff --git a/milli/src/heed_codec/facet/new/ordered_f64_codec.rs b/milli/src/heed_codec/facet/ordered_f64_codec.rs similarity index 100% rename from milli/src/heed_codec/facet/new/ordered_f64_codec.rs rename to milli/src/heed_codec/facet/ordered_f64_codec.rs diff --git a/milli/src/heed_codec/facet/new/str_ref.rs b/milli/src/heed_codec/facet/str_ref.rs similarity index 100% rename from milli/src/heed_codec/facet/new/str_ref.rs rename to milli/src/heed_codec/facet/str_ref.rs diff --git a/milli/src/index.rs b/milli/src/index.rs index 40e78bf10..66a53d98c 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -14,15 +14,10 @@ use time::OffsetDateTime; use crate::error::{InternalError, UserError}; use crate::facet::FacetType; use crate::fields_ids_map::FieldsIdsMap; -use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; -use crate::heed_codec::facet::new::str_ref::StrRefCodec; -use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec}; -use crate::heed_codec::facet::{ - // FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec, - FieldDocIdFacetF64Codec, - FieldDocIdFacetStringCodec, - FieldIdCodec, -}; +use crate::heed_codec::facet::OrderedF64Codec; +use crate::heed_codec::facet::StrRefCodec; +use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec}; +use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec}; use crate::{ default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, @@ -130,9 +125,9 @@ pub struct Index { pub facet_id_exists_docids: Database, /// Maps the facet field id and ranges of numbers with the docids that corresponds to them. - pub facet_id_f64_docids: Database, FacetGroupValueCodec>, + pub facet_id_f64_docids: Database, FacetGroupValueCodec>, /// Maps the facet field id and ranges of strings with the docids that corresponds to them. - pub facet_id_string_docids: Database, FacetGroupValueCodec>, + pub facet_id_string_docids: Database, FacetGroupValueCodec>, /// Maps the document id, the facet field id and the numbers. pub field_id_docid_facet_f64s: Database, diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index ccf66889e..2908f0e78 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -7,7 +7,7 @@ use roaring::RoaringBitmap; use super::{Criterion, CriterionParameters, CriterionResult}; use crate::facet::FacetType; -use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice}; +use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::facet::facet_sort_ascending::ascending_facet_sort; use crate::search::facet::facet_sort_descending::descending_facet_sort; @@ -196,14 +196,14 @@ fn facet_ordered<'t>( let number_iter = make_iter( rtxn, - index.facet_id_f64_docids.remap_key_type::>(), + index.facet_id_f64_docids.remap_key_type::>(), field_id, candidates.clone(), )?; let string_iter = make_iter( rtxn, - index.facet_id_string_docids.remap_key_type::>(), + index.facet_id_string_docids.remap_key_type::>(), field_id, candidates, )?; diff --git a/milli/src/search/distinct/facet_distinct.rs b/milli/src/search/distinct/facet_distinct.rs index 4a4815775..b9d584eb6 100644 --- a/milli/src/search/distinct/facet_distinct.rs +++ b/milli/src/search/distinct/facet_distinct.rs @@ -6,7 +6,7 @@ use roaring::RoaringBitmap; use super::{Distinct, DocIter}; use crate::error::InternalError; -use crate::heed_codec::facet::new::FacetKey; +use crate::heed_codec::facet::FacetGroupKey; use crate::heed_codec::facet::*; use crate::index::db_name; use crate::{DocumentId, FieldId, Index, Result}; @@ -48,7 +48,7 @@ impl<'a> FacetDistinctIter<'a> { fn facet_string_docids(&self, key: &str) -> heed::Result> { self.index .facet_id_string_docids - .get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key }) + .get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key }) .map(|opt| opt.map(|v| v.bitmap)) } @@ -56,7 +56,7 @@ impl<'a> FacetDistinctIter<'a> { // get facet docids on level 0 self.index .facet_id_f64_docids - .get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key }) + .get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key }) .map(|opt| opt.map(|v| v.bitmap)) } diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index c7619c609..10b995d97 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -8,12 +8,11 @@ use roaring::RoaringBitmap; use crate::error::UserError; use crate::facet::FacetType; -use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; -use crate::heed_codec::facet::new::str_ref::StrRefCodec; -use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; +use crate::heed_codec::facet::OrderedF64Codec; +use crate::heed_codec::facet::StrRefCodec; +use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec}; use crate::search::facet::facet_distribution_iter; -// use crate::search::facet::FacetStringIter; use crate::{FieldId, Index, Result}; /// The default number of values by facets that will @@ -138,7 +137,7 @@ impl<'a> FacetDistribution<'a> { ) -> heed::Result<()> { facet_distribution_iter::iterate_over_facet_distribution( self.rtxn, - self.index.facet_id_f64_docids.remap_key_type::>(), + self.index.facet_id_f64_docids.remap_key_type::>(), field_id, candidates, |facet_key, nbr_docids| { @@ -161,7 +160,7 @@ impl<'a> FacetDistribution<'a> { ) -> heed::Result<()> { facet_distribution_iter::iterate_over_facet_distribution( self.rtxn, - self.index.facet_id_string_docids.remap_key_type::>(), + self.index.facet_id_string_docids.remap_key_type::>(), field_id, candidates, |facet_key, nbr_docids| { @@ -191,7 +190,7 @@ impl<'a> FacetDistribution<'a> { let iter = db .as_polymorph() .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? - .remap_types::, FacetGroupValueCodec>(); + .remap_types::, FacetGroupValueCodec>(); for result in iter { let (key, value) = result?; @@ -206,7 +205,7 @@ impl<'a> FacetDistribution<'a> { .facet_id_string_docids .as_polymorph() .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? - .remap_types::, FacetGroupValueCodec>(); + .remap_types::, FacetGroupValueCodec>(); // TODO: get the original value of the facet somewhere (in the documents DB?) for result in iter { diff --git a/milli/src/search/facet/facet_distribution_iter.rs b/milli/src/search/facet/facet_distribution_iter.rs index 13ba28019..151304029 100644 --- a/milli/src/search/facet/facet_distribution_iter.rs +++ b/milli/src/search/facet/facet_distribution_iter.rs @@ -4,11 +4,11 @@ use heed::Result; use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level}; -use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice}; +use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupValueCodec, FacetGroupKeyCodec}; pub fn iterate_over_facet_distribution<'t, CB>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: &RoaringBitmap, callback: CB, @@ -18,9 +18,9 @@ where { let mut fd = FacetDistribution { rtxn, db, field_id, callback }; let highest_level = - get_highest_level(rtxn, db.remap_key_type::>(), field_id)?; + get_highest_level(rtxn, db.remap_key_type::>(), field_id)?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; return Ok(()); } else { @@ -33,7 +33,7 @@ where CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, callback: CB, } @@ -49,7 +49,7 @@ where group_size: usize, ) -> Result> { let starting_key = - FacetKey { field_id: self.field_id, level: 0, left_bound: starting_bound }; + FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_bound }; let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size); for el in iter { let (key, value) = el?; @@ -78,7 +78,7 @@ where if level == 0 { return self.iterate_level_0(candidates, starting_bound, group_size); } - let starting_key = FacetKey { field_id: self.field_id, level, left_bound: starting_bound }; + let starting_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound }; let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size); for el in iter { @@ -116,7 +116,7 @@ mod tests { use roaring::RoaringBitmap; use super::iterate_over_facet_distribution; - use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; + use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; use crate::milli_snap; use crate::search::facet::test::FacetIndex; diff --git a/milli/src/search/facet/facet_range_search.rs b/milli/src/search/facet/facet_range_search.rs index 039cd5c8d..a0e6d8e03 100644 --- a/milli/src/search/facet/facet_range_search.rs +++ b/milli/src/search/facet/facet_range_search.rs @@ -4,12 +4,12 @@ use heed::BytesEncode; use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; -use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice}; +use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef}; use crate::Result; pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, left: &'t Bound<>::EItem>, right: &'t Bound<>::EItem>, @@ -42,13 +42,13 @@ where } Bound::Unbounded => Bound::Unbounded, }; - let db = db.remap_key_type::>(); + let db = db.remap_key_type::>(); let mut docids = RoaringBitmap::new(); let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids }; let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { - let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?; Ok(docids) } else { @@ -59,7 +59,7 @@ where /// Fetch the document ids that have a facet with a value between the two given bounds struct FacetRangeSearch<'t, 'b, 'bitmap> { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, left: Bound<&'b [u8]>, right: Bound<&'b [u8]>, @@ -68,7 +68,7 @@ struct FacetRangeSearch<'t, 'b, 'bitmap> { impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> { let left_key = - FacetKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound }; + FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound }; let iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size); for el in iter { let (key, value) = el?; @@ -117,7 +117,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { return self.run_level_0(starting_left_bound, group_size); } - let left_key = FacetKey { field_id: self.field_id, level, left_bound: starting_left_bound }; + let left_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound }; let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size); let (mut previous_key, mut previous_value) = iter.next().unwrap()?; @@ -258,8 +258,8 @@ mod tests { use roaring::RoaringBitmap; use super::find_docids_of_facet_within_bounds; - use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; - use crate::heed_codec::facet::new::FacetKeyCodec; + use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; + use crate::heed_codec::facet::FacetGroupKeyCodec; use crate::milli_snap; use crate::search::facet::test::FacetIndex; use crate::snapshot_tests::display_bitmap; @@ -310,7 +310,7 @@ mod tests { let end = Bound::Included(i); let docids = find_docids_of_facet_within_bounds::( &txn, - index.db.content.remap_key_type::>(), + index.db.content.remap_key_type::>(), 0, &start, &end, @@ -326,7 +326,7 @@ mod tests { let end = Bound::Excluded(i); let docids = find_docids_of_facet_within_bounds::( &txn, - index.db.content.remap_key_type::>(), + index.db.content.remap_key_type::>(), 0, &start, &end, @@ -352,7 +352,7 @@ mod tests { let end = Bound::Included(255.); let docids = find_docids_of_facet_within_bounds::( &txn, - index.db.content.remap_key_type::>(), + index.db.content.remap_key_type::>(), 0, &start, &end, @@ -371,7 +371,7 @@ mod tests { let end = Bound::Excluded(255.); let docids = find_docids_of_facet_within_bounds::( &txn, - index.db.content.remap_key_type::>(), + index.db.content.remap_key_type::>(), 0, &start, &end, @@ -399,7 +399,7 @@ mod tests { let end = Bound::Included(255. - i); let docids = find_docids_of_facet_within_bounds::( &txn, - index.db.content.remap_key_type::>(), + index.db.content.remap_key_type::>(), 0, &start, &end, @@ -418,7 +418,7 @@ mod tests { let end = Bound::Excluded(255. - i); let docids = find_docids_of_facet_within_bounds::( &txn, - index.db.content.remap_key_type::>(), + index.db.content.remap_key_type::>(), 0, &start, &end, diff --git a/milli/src/search/facet/facet_sort_ascending.rs b/milli/src/search/facet/facet_sort_ascending.rs index b3cae5d28..b601242e8 100644 --- a/milli/src/search/facet/facet_sort_ascending.rs +++ b/milli/src/search/facet/facet_sort_ascending.rs @@ -2,19 +2,19 @@ use heed::Result; use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level}; -use crate::heed_codec::facet::new::{ - FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, +use crate::heed_codec::facet::{ + FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, }; pub fn ascending_facet_sort<'t>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, ) -> Result> + 't>> { let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { - let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound }; + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] })) @@ -25,11 +25,11 @@ pub fn ascending_facet_sort<'t>( struct AscendingFacetSort<'t, 'e> { rtxn: &'t heed::RoTxn<'e>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, stack: Vec<( RoaringBitmap, - std::iter::Take, FacetGroupValueCodec>>, + std::iter::Take, FacetGroupValueCodec>>, )>, } @@ -41,7 +41,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { let (documents_ids, deepest_iter) = self.stack.last_mut()?; for result in deepest_iter { let ( - FacetKey { level, left_bound, field_id }, + FacetGroupKey { level, left_bound, field_id }, FacetGroupValue { size: group_size, mut bitmap }, ) = result.unwrap(); // The range is unbounded on the right and the group size for the highest level is MAX, @@ -65,7 +65,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> { return Some(Ok(bitmap)); } let starting_key_below = - FacetKey { field_id: self.field_id, level: level - 1, left_bound }; + FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound }; let iter = match self.db.range(&self.rtxn, &(starting_key_below..)) { Ok(iter) => iter, Err(e) => return Some(Err(e.into())), @@ -86,7 +86,7 @@ mod tests { use rand::{Rng, SeedableRng}; use roaring::RoaringBitmap; - use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; + use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; use crate::milli_snap; use crate::search::facet::facet_sort_ascending::ascending_facet_sort; use crate::search::facet::test::FacetIndex; diff --git a/milli/src/search/facet/facet_sort_descending.rs b/milli/src/search/facet/facet_sort_descending.rs index d68c9bdad..088f8d2fa 100644 --- a/milli/src/search/facet/facet_sort_descending.rs +++ b/milli/src/search/facet/facet_sort_descending.rs @@ -4,21 +4,21 @@ use heed::Result; use roaring::RoaringBitmap; use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; -use crate::heed_codec::facet::new::{ - FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, +use crate::heed_codec::facet::{ + FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, }; pub fn descending_facet_sort<'t>( rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, candidates: RoaringBitmap, ) -> Result> + 't>> { let highest_level = get_highest_level(rtxn, db, field_id)?; - if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { - let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound }; - let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); - let last_key = FacetKey { field_id, level: highest_level, left_bound: last_bound }; + if let Some(first_bound) = get_first_facet_value::(rtxn, db, field_id)? { + let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound }; + let last_bound = get_last_facet_value::(rtxn, db, field_id)?.unwrap(); + let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound }; let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); Ok(Box::new(DescendingFacetSort { rtxn, @@ -33,11 +33,11 @@ pub fn descending_facet_sort<'t>( struct DescendingFacetSort<'t> { rtxn: &'t heed::RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, stack: Vec<( RoaringBitmap, - std::iter::Take, FacetGroupValueCodec>>, + std::iter::Take, FacetGroupValueCodec>>, Bound<&'t [u8]>, )>, } @@ -50,7 +50,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> { let (documents_ids, deepest_iter, right_bound) = self.stack.last_mut()?; while let Some(result) = deepest_iter.next() { let ( - FacetKey { level, left_bound, field_id }, + FacetGroupKey { level, left_bound, field_id }, FacetGroupValue { size: group_size, mut bitmap }, ) = result.unwrap(); // The range is unbounded on the right and the group size for the highest level is MAX, @@ -72,15 +72,15 @@ impl<'t> Iterator for DescendingFacetSort<'t> { if level == 0 { return Some(Ok(bitmap)); } - let starting_key_below = FacetKey { field_id, level: level - 1, left_bound }; + let starting_key_below = FacetGroupKey { field_id, level: level - 1, left_bound }; let end_key_kelow = match *right_bound { - Bound::Included(right) => Bound::Included(FacetKey { + Bound::Included(right) => Bound::Included(FacetGroupKey { field_id, level: level - 1, left_bound: right, }), - Bound::Excluded(right) => Bound::Excluded(FacetKey { + Bound::Excluded(right) => Bound::Excluded(FacetGroupKey { field_id, level: level - 1, left_bound: right, @@ -90,7 +90,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> { let prev_right_bound = *right_bound; *right_bound = Bound::Excluded(left_bound); let iter = - match self.db.remap_key_type::>().rev_range( + match self.db.remap_key_type::>().rev_range( &self.rtxn, &(Bound::Included(starting_key_below), end_key_kelow), ) { @@ -114,8 +114,8 @@ mod tests { use rand::{Rng, SeedableRng}; use roaring::RoaringBitmap; - use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; - use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice}; + use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; + use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef}; use crate::milli_snap; use crate::search::facet::facet_sort_descending::descending_facet_sort; use crate::search::facet::test::FacetIndex; @@ -162,7 +162,7 @@ mod tests { let txn = index.env.read_txn().unwrap(); let candidates = (200..=300).into_iter().collect::(); let mut results = String::new(); - let db = index.db.content.remap_key_type::>(); + let db = index.db.content.remap_key_type::>(); let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); for el in iter { let docids = el.unwrap(); diff --git a/milli/src/search/facet/filter.rs b/milli/src/search/facet/filter.rs index 6a10b7097..1b40f6db1 100644 --- a/milli/src/search/facet/filter.rs +++ b/milli/src/search/facet/filter.rs @@ -9,8 +9,8 @@ use roaring::RoaringBitmap; use super::facet_range_search; use crate::error::{Error, UserError}; -use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; -use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec}; +use crate::heed_codec::facet::OrderedF64Codec; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec}; use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result}; /// The maximum number of filters the filter AST can process. @@ -180,7 +180,11 @@ impl<'a> Filter<'a> { let string_docids = strings_db .get( rtxn, - &FacetKey { field_id, level: 0, left_bound: &val.value().to_lowercase() }, + &FacetGroupKey { + field_id, + level: 0, + left_bound: &val.value().to_lowercase(), + }, )? .map(|v| v.bitmap) .unwrap_or_default(); @@ -218,10 +222,10 @@ impl<'a> Filter<'a> { .remap_data_type::() .get_lower_than_or_equal_to( rtxn, - &FacetKey { field_id, level: u8::MAX, left_bound: f64::MAX }, + &FacetGroupKey { field_id, level: u8::MAX, left_bound: f64::MAX }, )? .and_then( - |(FacetKey { field_id: id, level, .. }, _)| { + |(FacetGroupKey { field_id: id, level, .. }, _)| { if id == field_id { Some(level) } else { @@ -252,7 +256,7 @@ impl<'a> Filter<'a> { /// going deeper through the levels. fn explore_facet_number_levels( rtxn: &heed::RoTxn, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: FieldId, level: u8, left: Bound, diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 78cd8fd4b..ec5caa2a8 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -3,7 +3,7 @@ use heed::{BytesDecode, RoTxn}; pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; pub use self::filter::Filter; -use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; +use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef}; mod facet_distribution; mod facet_distribution_iter; @@ -14,7 +14,7 @@ mod filter; pub(crate) fn get_first_facet_value<'t, BoundCodec>( txn: &'t RoTxn, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result> where @@ -28,7 +28,7 @@ where if let Some(first) = level0_iter_forward.next() { let (first_key, _) = first?; let first_key = - FacetKeyCodec::::bytes_decode(first_key).ok_or(heed::Error::Encoding)?; + FacetGroupKeyCodec::::bytes_decode(first_key).ok_or(heed::Error::Encoding)?; Ok(Some(first_key.left_bound)) } else { Ok(None) @@ -36,7 +36,7 @@ where } pub(crate) fn get_last_facet_value<'t, BoundCodec>( txn: &'t RoTxn, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result> where @@ -51,7 +51,7 @@ where if let Some(last) = level0_iter_backward.next() { let (last_key, _) = last?; let last_key = - FacetKeyCodec::::bytes_decode(last_key).ok_or(heed::Error::Encoding)?; + FacetGroupKeyCodec::::bytes_decode(last_key).ok_or(heed::Error::Encoding)?; Ok(Some(last_key.left_bound)) } else { Ok(None) @@ -59,7 +59,7 @@ where } pub(crate) fn get_highest_level<'t>( txn: &'t RoTxn<'t>, - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, field_id: u16, ) -> heed::Result { let field_id_prefix = &field_id.to_be_bytes(); @@ -69,7 +69,7 @@ pub(crate) fn get_highest_level<'t>( .next() .map(|el| { let (key, _) = el.unwrap(); - let key = FacetKeyCodec::::bytes_decode(key).unwrap(); + let key = FacetGroupKeyCodec::::bytes_decode(key).unwrap(); key.level }) .unwrap_or(0)) @@ -84,8 +84,8 @@ pub mod test { use heed::{BytesDecode, BytesEncode, Env, RwTxn}; use roaring::RoaringBitmap; - use crate::heed_codec::facet::new::{ - FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, + use crate::heed_codec::facet::{ + FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, }; use crate::snapshot_tests::display_bitmap; use crate::update::FacetsUpdateIncremental; @@ -101,7 +101,7 @@ pub mod test { } pub struct Database { - pub content: heed::Database, FacetGroupValueCodec>, + pub content: heed::Database, FacetGroupValueCodec>, pub group_size: usize, pub max_group_size: usize, _tempdir: Rc, @@ -184,7 +184,7 @@ pub mod test { let mut iter = self.db.content.iter(&txn).unwrap(); while let Some(el) = iter.next() { let (key, value) = el.unwrap(); - let FacetKey { field_id, level, left_bound: bound } = key; + let FacetGroupKey { field_id, level, left_bound: bound } = key; let bound = BoundCodec::bytes_decode(bound).unwrap(); let FacetGroupValue { size, bitmap } = value; writeln!( diff --git a/milli/src/snapshot_tests.rs b/milli/src/snapshot_tests.rs index 57fd2e5fe..ab9dddaf2 100644 --- a/milli/src/snapshot_tests.rs +++ b/milli/src/snapshot_tests.rs @@ -5,7 +5,7 @@ use std::path::Path; use roaring::RoaringBitmap; use crate::facet::FacetType; -use crate::heed_codec::facet::new::{FacetGroupValue, FacetKey}; +use crate::heed_codec::facet::{FacetGroupValue, FacetGroupKey}; use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index}; #[track_caller] @@ -280,7 +280,7 @@ pub fn snap_word_prefix_position_docids(index: &Index) -> String { } pub fn snap_facet_id_f64_docids(index: &Index) -> String { let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |( - FacetKey { field_id, level, left_bound }, + FacetGroupKey { field_id, level, left_bound }, FacetGroupValue { size, bitmap }, )| { &format!("{field_id:<3} {level:<2} {left_bound:<6} {size:<2} {}", display_bitmap(&bitmap)) @@ -289,7 +289,7 @@ pub fn snap_facet_id_f64_docids(index: &Index) -> String { } pub fn snap_facet_id_string_docids(index: &Index) -> String { let snap = make_db_snap_from_iter!(index, facet_id_string_docids, |( - FacetKey { field_id, level, left_bound }, + FacetGroupKey { field_id, level, left_bound }, FacetGroupValue { size, bitmap }, )| { &format!("{field_id:<3} {level:<2} {left_bound:<12} {size:<2} {}", display_bitmap(&bitmap)) diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index ffa63f0a7..5b9e99d77 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -11,7 +11,7 @@ use time::OffsetDateTime; use super::{ClearDocuments, FacetsUpdateBulk}; use crate::error::{InternalError, UserError}; use crate::facet::FacetType; -use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; +use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef}; use crate::heed_codec::CboRoaringBitmapCodec; use crate::index::{db_name, main_key}; use crate::{ @@ -626,10 +626,10 @@ fn remove_docids_from_facet_id_docids<'a>( ) -> Result<()> { let db = match facet_type { FacetType::String => { - index.facet_id_string_docids.remap_key_type::>() + index.facet_id_string_docids.remap_key_type::>() } FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() + index.facet_id_f64_docids.remap_key_type::>() } }; let mut modified = false; diff --git a/milli/src/update/facet/bulk.rs b/milli/src/update/facet/bulk.rs index 0a4b7db45..38017a83d 100644 --- a/milli/src/update/facet/bulk.rs +++ b/milli/src/update/facet/bulk.rs @@ -12,8 +12,8 @@ use time::OffsetDateTime; use crate::error::InternalError; use crate::facet::FacetType; -use crate::heed_codec::facet::new::{ - FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, +use crate::heed_codec::facet::{ + FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, }; use crate::update::index_documents::{ create_writer, valid_lmdb_key, write_into_lmdb_database, writer_into_reader, @@ -22,7 +22,7 @@ use crate::{CboRoaringBitmapCodec, FieldId, Index, Result}; pub struct FacetsUpdateBulk<'i> { index: &'i Index, - database: heed::Database, FacetGroupValueCodec>, + database: heed::Database, FacetGroupValueCodec>, level_group_size: usize, min_level_size: usize, facet_type: FacetType, @@ -40,10 +40,10 @@ impl<'i> FacetsUpdateBulk<'i> { index, database: match facet_type { FacetType::String => { - index.facet_id_string_docids.remap_key_type::>() + index.facet_id_string_docids.remap_key_type::>() } FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() + index.facet_id_f64_docids.remap_key_type::>() } }, level_group_size: 4, @@ -61,10 +61,10 @@ impl<'i> FacetsUpdateBulk<'i> { index, database: match facet_type { FacetType::String => { - index.facet_id_string_docids.remap_key_type::>() + index.facet_id_string_docids.remap_key_type::>() } FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() + index.facet_id_f64_docids.remap_key_type::>() } }, level_group_size: 4, @@ -89,8 +89,8 @@ impl<'i> FacetsUpdateBulk<'i> { } fn clear_levels(&self, wtxn: &mut heed::RwTxn, field_id: FieldId) -> Result<()> { - let left = FacetKey::<&[u8]> { field_id, level: 1, left_bound: &[] }; - let right = FacetKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] }; + let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] }; + let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] }; let range = left..=right; self.database.delete_range(wtxn, &range).map(drop)?; Ok(()) @@ -119,7 +119,7 @@ impl<'i> FacetsUpdateBulk<'i> { for level_reader in level_readers { let mut cursor = level_reader.into_cursor()?; while let Some((k, v)) = cursor.move_on_next()? { - let key = FacetKeyCodec::::bytes_decode(k).unwrap(); + let key = FacetGroupKeyCodec::::bytes_decode(k).unwrap(); let value = FacetGroupValueCodec::bytes_decode(v).unwrap(); println!("inserting {key:?} {value:?}"); @@ -210,7 +210,7 @@ impl<'i> FacetsUpdateBulk<'i> { struct ComputeHigherLevels<'t> { rtxn: &'t heed::RoTxn<'t>, - db: &'t heed::Database, FacetGroupValueCodec>, + db: &'t heed::Database, FacetGroupValueCodec>, field_id: u16, level_group_size: usize, min_level_size: usize, @@ -233,7 +233,7 @@ impl<'t> ComputeHigherLevels<'t> { .db .as_polymorph() .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, level_0_prefix.as_slice())? - .remap_types::, FacetGroupValueCodec>(); + .remap_types::, FacetGroupValueCodec>(); let mut left_bound: &[u8] = &[]; let mut first_iteration_for_new_group = true; @@ -311,9 +311,9 @@ impl<'t> ComputeHigherLevels<'t> { for ((bitmap, left_bound), group_size) in bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) { - let key = FacetKey { field_id: self.field_id, level, left_bound }; + let key = FacetGroupKey { field_id: self.field_id, level, left_bound }; let key = - FacetKeyCodec::::bytes_encode(&key).ok_or(Error::Encoding)?; + FacetGroupKeyCodec::::bytes_encode(&key).ok_or(Error::Encoding)?; let value = FacetGroupValue { size: group_size, bitmap }; let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; @@ -329,9 +329,9 @@ impl<'t> ComputeHigherLevels<'t> { for ((bitmap, left_bound), group_size) in bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) { - let key = FacetKey { field_id: self.field_id, level, left_bound }; + let key = FacetGroupKey { field_id: self.field_id, level, left_bound }; let key = - FacetKeyCodec::::bytes_encode(&key).ok_or(Error::Encoding)?; + FacetGroupKeyCodec::::bytes_encode(&key).ok_or(Error::Encoding)?; let value = FacetGroupValue { size: group_size, bitmap }; let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; cur_writer.insert(key, value)?; diff --git a/milli/src/update/facet/incremental.rs b/milli/src/update/facet/incremental.rs index e32a6baf1..e86aa4402 100644 --- a/milli/src/update/facet/incremental.rs +++ b/milli/src/update/facet/incremental.rs @@ -2,8 +2,8 @@ use heed::types::ByteSlice; use heed::{BytesDecode, Error, RoTxn, RwTxn}; use roaring::RoaringBitmap; -use crate::heed_codec::facet::new::{ - FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, +use crate::heed_codec::facet::{ + FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef, }; use crate::search::facet::get_highest_level; use crate::Result; @@ -19,13 +19,13 @@ enum DeletionResult { } pub struct FacetsUpdateIncremental { - db: heed::Database, FacetGroupValueCodec>, + db: heed::Database, FacetGroupValueCodec>, group_size: usize, min_level_size: usize, max_group_size: usize, } impl FacetsUpdateIncremental { - pub fn new(db: heed::Database, FacetGroupValueCodec>) -> Self { + pub fn new(db: heed::Database, FacetGroupValueCodec>) -> Self { Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 } } } @@ -36,7 +36,7 @@ impl FacetsUpdateIncremental { level: u8, search_key: &[u8], txn: &RoTxn, - ) -> Result<(FacetKey>, FacetGroupValue)> { + ) -> Result<(FacetGroupKey>, FacetGroupValue)> { let mut prefix = vec![]; prefix.extend_from_slice(&field_id.to_be_bytes()); prefix.push(level); @@ -45,17 +45,17 @@ impl FacetsUpdateIncremental { let mut prefix_iter = self .db .as_polymorph() - .prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(txn, &prefix.as_slice())?; + .prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(txn, &prefix.as_slice())?; if let Some(e) = prefix_iter.next() { let (key_bytes, value) = e?; Ok(( - FacetKeyCodec::::bytes_decode(&key_bytes) + FacetGroupKeyCodec::::bytes_decode(&key_bytes) .ok_or(Error::Encoding)? .into_owned(), value, )) } else { - let key = FacetKey { field_id, level, left_bound: search_key }; + let key = FacetGroupKey { field_id, level, left_bound: search_key }; match self.db.get_lower_than(txn, &key)? { Some((key, value)) => { if key.level != level || key.field_id != field_id { @@ -66,13 +66,13 @@ impl FacetsUpdateIncremental { let mut iter = self .db .as_polymorph() - .prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>( + .prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>( txn, &prefix.as_slice(), )?; let (key_bytes, value) = iter.next().unwrap()?; Ok(( - FacetKeyCodec::::bytes_decode(&key_bytes) + FacetGroupKeyCodec::::bytes_decode(&key_bytes) .ok_or(Error::Encoding)? .into_owned(), value, @@ -93,7 +93,7 @@ impl FacetsUpdateIncremental { new_key: &[u8], new_values: &RoaringBitmap, ) -> Result { - let key = FacetKey { field_id, level: 0, left_bound: new_key }; + let key = FacetGroupKey { field_id, level: 0, left_bound: new_key }; let value = FacetGroupValue { bitmap: new_values.clone(), size: 1 }; let mut level0_prefix = vec![]; @@ -193,7 +193,7 @@ impl FacetsUpdateIncremental { .db .get_greater_than_or_equal_to( &txn, - &FacetKey { + &FacetGroupKey { field_id, level: level_below, left_bound: insertion_key.left_bound.as_slice(), @@ -217,7 +217,7 @@ impl FacetsUpdateIncremental { } let key = - FacetKey { field_id, level, left_bound: insertion_key.left_bound.clone() }; + FacetGroupKey { field_id, level, left_bound: insertion_key.left_bound.clone() }; let value = FacetGroupValue { size: size_left as u8, bitmap: values_left }; (key, value) }; @@ -235,7 +235,7 @@ impl FacetsUpdateIncremental { } let key = - FacetKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() }; + FacetGroupKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() }; let value = FacetGroupValue { size: size_right as u8, bitmap: values_right }; (key, value) }; @@ -303,7 +303,7 @@ impl FacetsUpdateIncremental { let mut values = RoaringBitmap::new(); for _ in 0..group_size { let (key_bytes, value_i) = groups_iter.next().unwrap()?; - let key_i = FacetKeyCodec::::bytes_decode(&key_bytes) + let key_i = FacetGroupKeyCodec::::bytes_decode(&key_bytes) .ok_or(Error::Encoding)?; if first_key.is_none() { @@ -311,7 +311,7 @@ impl FacetsUpdateIncremental { } values |= value_i.bitmap; } - let key = FacetKey { + let key = FacetGroupKey { field_id, level: highest_level + 1, left_bound: first_key.unwrap().left_bound, @@ -384,7 +384,7 @@ impl FacetsUpdateIncremental { key: &[u8], value: u32, ) -> Result { - let key = FacetKey { field_id, level: 0, left_bound: key }; + let key = FacetGroupKey { field_id, level: 0, left_bound: key }; let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap; bitmap.remove(value); @@ -415,7 +415,7 @@ impl FacetsUpdateIncremental { key: &[u8], value: u32, ) -> Result<()> { - if self.db.get(txn, &FacetKey { field_id, level: 0, left_bound: key })?.is_none() { + if self.db.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: key })?.is_none() { return Ok(()); } let highest_level = get_highest_level(&txn, self.db, field_id)?; @@ -450,7 +450,7 @@ impl FacetsUpdateIncremental { while let Some(el) = iter.next() { let (k, _) = el?; to_delete.push( - FacetKeyCodec::::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(), + FacetGroupKeyCodec::::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(), ); } drop(iter); @@ -469,9 +469,9 @@ mod tests { use rand::{Rng, SeedableRng}; use roaring::RoaringBitmap; - use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; - use crate::heed_codec::facet::new::str_ref::StrRefCodec; - use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; + use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec; + use crate::heed_codec::facet::str_ref::StrRefCodec; + use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef}; use crate::milli_snap; use crate::search::facet::get_highest_level; use crate::search::facet::test::FacetIndex; @@ -502,7 +502,7 @@ mod tests { .unwrap(); while let Some(el) = iter.next() { let (key, value) = el.unwrap(); - let key = FacetKeyCodec::::bytes_decode(&key).unwrap(); + let key = FacetGroupKeyCodec::::bytes_decode(&key).unwrap(); let mut prefix_start_below = vec![]; prefix_start_below.extend_from_slice(&field_id.to_be_bytes()); @@ -519,7 +519,7 @@ mod tests { ) .unwrap(); let (key_bytes, _) = start_below_iter.next().unwrap().unwrap(); - FacetKeyCodec::::bytes_decode(&key_bytes).unwrap() + FacetGroupKeyCodec::::bytes_decode(&key_bytes).unwrap() }; assert!(value.size > 0 && (value.size as usize) < db.max_group_size); @@ -996,7 +996,7 @@ mod tests { // for ((key, values), group) in values_field_id.iter().zip(level0iter) { // let (group_key, group_values) = group.unwrap(); -// let group_key = FacetKeyCodec::::bytes_decode(group_key).unwrap(); +// let group_key = FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); // assert_eq!(key, &group_key.left_bound); // assert_eq!(values, &group_values.bitmap); // } @@ -1014,7 +1014,7 @@ mod tests { // for ((key, values), group) in values_field_id.iter().zip(level0iter) { // let (group_key, group_values) = group.unwrap(); -// let group_key = FacetKeyCodec::::bytes_decode(group_key).unwrap(); +// let group_key = FacetGroupKeyCodec::::bytes_decode(group_key).unwrap(); // assert_eq!(key, &group_key.left_bound); // assert_eq!(values, &group_values.bitmap); // } diff --git a/milli/src/update/facet/mod.rs b/milli/src/update/facet/mod.rs index 00964a406..77b42f355 100644 --- a/milli/src/update/facet/mod.rs +++ b/milli/src/update/facet/mod.rs @@ -1,23 +1,20 @@ -use std::{collections::HashMap, fs::File}; - +use super::{FacetsUpdateBulk, FacetsUpdateIncremental}; +use crate::{ + facet::FacetType, + heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec}, + CboRoaringBitmapCodec, FieldId, Index, Result, +}; use grenad::CompressionType; use heed::BytesDecode; use roaring::RoaringBitmap; - -use crate::{ - facet::FacetType, - heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}, - CboRoaringBitmapCodec, FieldId, Index, Result, -}; - -use super::{FacetsUpdateBulk, FacetsUpdateIncremental}; +use std::{collections::HashMap, fs::File}; pub mod bulk; pub mod incremental; pub struct FacetsUpdate<'i> { index: &'i Index, - database: heed::Database, FacetGroupValueCodec>, + database: heed::Database, FacetGroupValueCodec>, level_group_size: u8, max_level_group_size: u8, min_level_size: u8, @@ -28,10 +25,10 @@ impl<'i> FacetsUpdate<'i> { pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader) -> Self { let database = match facet_type { FacetType::String => { - index.facet_id_string_docids.remap_key_type::>() + index.facet_id_string_docids.remap_key_type::>() } FacetType::Number => { - index.facet_id_f64_docids.remap_key_type::>() + index.facet_id_f64_docids.remap_key_type::>() } }; Self { @@ -70,8 +67,8 @@ impl<'i> FacetsUpdate<'i> { let mut cursor = self.new_data.into_cursor()?; while let Some((key, value)) = cursor.move_on_next()? { - let key = - FacetKeyCodec::::bytes_decode(key).ok_or(heed::Error::Encoding)?; + let key = FacetGroupKeyCodec::::bytes_decode(key) + .ok_or(heed::Error::Encoding)?; let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?; indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?; diff --git a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs index eece08ee3..9a89691b1 100644 --- a/milli/src/update/index_documents/extract/extract_facet_number_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_number_docids.rs @@ -6,9 +6,9 @@ use heed::{BytesDecode, BytesEncode}; use super::helpers::{ create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters, }; -use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; -use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec}; use crate::heed_codec::facet::FieldDocIdFacetF64Codec; +use crate::heed_codec::facet::OrderedF64Codec; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; use crate::Result; /// Extracts the facet number and the documents ids where this facet number appear. @@ -36,8 +36,8 @@ pub fn extract_facet_number_docids( let (field_id, document_id, number) = FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap(); - let key = FacetKey { field_id, level: 0, left_bound: number }; - let key_bytes = FacetKeyCodec::::bytes_encode(&key).unwrap(); + let key = FacetGroupKey { field_id, level: 0, left_bound: number }; + let key_bytes = FacetGroupKeyCodec::::bytes_encode(&key).unwrap(); facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?; } diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index 591f44c74..078a82335 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -4,8 +4,8 @@ use std::io; use heed::BytesEncode; use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters}; -use crate::heed_codec::facet::new::str_ref::StrRefCodec; -use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec}; +use crate::heed_codec::facet::StrRefCodec; +use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec}; use crate::update::index_documents::merge_cbo_roaring_bitmaps; use crate::{FieldId, Result}; @@ -43,8 +43,8 @@ pub fn extract_facet_string_docids( let document_id = u32::from_be_bytes(document_id_bytes); let normalised_value = std::str::from_utf8(normalized_value_bytes)?; - let key = FacetKey { field_id, level: 0, left_bound: normalised_value }; - let key_bytes = FacetKeyCodec::::bytes_encode(&key).unwrap(); + let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value }; + let key_bytes = FacetGroupKeyCodec::::bytes_encode(&key).unwrap(); facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?; } diff --git a/milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/initial/word_pair_proximity_docids.hash.snap b/milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/initial/word_pair_proximity_docids.hash.snap new file mode 100644 index 000000000..e50e50347 --- /dev/null +++ b/milli/src/update/snapshots/word_prefix_pair_proximity_docids.rs/test_update/initial/word_pair_proximity_docids.hash.snap @@ -0,0 +1,4 @@ +--- +source: milli/src/update/word_prefix_pair_proximity_docids.rs +--- +6873ff1f78d08f2b1a13bb9e37349c01