Refactor facet-related codecs

This commit is contained in:
Loïc Lecrenier 2022-09-05 13:01:36 +02:00 committed by Loïc Lecrenier
parent 9b55e582cd
commit 485a72306d
22 changed files with 280 additions and 301 deletions

View File

@ -1,25 +1,19 @@
// mod facet_level_value_f64_codec;
// mod facet_level_value_u32_codec;
// mod facet_string_level_zero_codec;
// mod facet_string_level_zero_value_codec;
// mod facet_string_zero_bounds_value_codec;
mod field_doc_id_facet_f64_codec; mod field_doc_id_facet_f64_codec;
mod field_doc_id_facet_string_codec; mod field_doc_id_facet_string_codec;
mod ordered_f64_codec;
mod str_ref;
pub mod new;
use heed::types::OwnedType;
// pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
// pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec;
// pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec;
// pub use self::facet_string_level_zero_value_codec::{
// decode_prefix_string, encode_prefix_string, FacetStringLevelZeroValueCodec,
// };
// pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec;
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec; pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec; pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
use crate::BEU16; pub use self::ordered_f64_codec::OrderedF64Codec;
pub use self::str_ref::StrRefCodec;
use crate::{CboRoaringBitmapCodec, BEU16};
use heed::types::OwnedType;
use heed::{BytesDecode, BytesEncode};
use roaring::RoaringBitmap;
use std::borrow::Cow;
use std::convert::TryFrom;
use std::marker::PhantomData;
pub type FieldIdCodec = OwnedType<BEU16>; pub type FieldIdCodec = OwnedType<BEU16>;
@ -32,3 +26,109 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
None None
} }
} }
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct FacetGroupKey<T> {
pub field_id: u16,
pub level: u8,
pub left_bound: T,
}
impl<'a> FacetGroupKey<&'a [u8]> {
pub fn into_owned(self) -> FacetGroupKey<Vec<u8>> {
FacetGroupKey {
field_id: self.field_id,
level: self.level,
left_bound: self.left_bound.to_vec(),
}
}
}
impl<'a> FacetGroupKey<Vec<u8>> {
pub fn as_ref(&self) -> FacetGroupKey<&[u8]> {
FacetGroupKey {
field_id: self.field_id,
level: self.level,
left_bound: self.left_bound.as_slice(),
}
}
}
#[derive(Debug)]
pub struct FacetGroupValue {
pub size: u8,
pub bitmap: RoaringBitmap,
}
pub struct FacetGroupKeyCodec<T> {
_phantom: PhantomData<T>,
}
impl<'a, T> heed::BytesEncode<'a> for FacetGroupKeyCodec<T>
where
T: BytesEncode<'a>,
T::EItem: Sized,
{
type EItem = FacetGroupKey<T::EItem>;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut v = vec![];
v.extend_from_slice(&value.field_id.to_be_bytes());
v.extend_from_slice(&[value.level]);
let bound = T::bytes_encode(&value.left_bound)?;
v.extend_from_slice(&bound);
Some(Cow::Owned(v))
}
}
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
where
T: BytesDecode<'a>,
{
type DItem = FacetGroupKey<T::DItem>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
let level = bytes[2];
let bound = T::bytes_decode(&bytes[3..])?;
Some(FacetGroupKey { field_id: fid, level, left_bound: bound })
}
}
pub struct FacetGroupValueCodec;
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
type EItem = FacetGroupValue;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut v = vec![];
v.push(value.size);
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
Some(Cow::Owned(v))
}
}
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
type DItem = FacetGroupValue;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let size = bytes[0];
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
Some(FacetGroupValue { size, bitmap })
}
}
pub struct ByteSliceRef;
impl<'a> BytesEncode<'a> for ByteSliceRef {
type EItem = &'a [u8];
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item))
}
}
impl<'a> BytesDecode<'a> for ByteSliceRef {
type DItem = &'a [u8];
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Some(bytes)
}
}

View File

@ -1,120 +0,0 @@
use std::borrow::Cow;
use std::convert::TryFrom;
use std::marker::PhantomData;
use heed::{BytesDecode, BytesEncode};
use roaring::RoaringBitmap;
use crate::CboRoaringBitmapCodec;
pub mod ordered_f64_codec;
pub mod str_ref;
// TODO: these codecs were quickly written and not fast/resilient enough
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct FacetKey<T> {
pub field_id: u16,
pub level: u8,
pub left_bound: T,
}
impl<'a> FacetKey<&'a [u8]> {
pub fn into_owned(self) -> FacetKey<Vec<u8>> {
FacetKey {
field_id: self.field_id,
level: self.level,
left_bound: self.left_bound.to_vec(),
}
}
}
impl<'a> FacetKey<Vec<u8>> {
pub fn as_ref(&self) -> FacetKey<&[u8]> {
FacetKey {
field_id: self.field_id,
level: self.level,
left_bound: self.left_bound.as_slice(),
}
}
}
#[derive(Debug)]
pub struct FacetGroupValue {
pub size: u8,
pub bitmap: RoaringBitmap,
}
pub struct FacetKeyCodec<T> {
_phantom: PhantomData<T>,
}
impl<'a, T> heed::BytesEncode<'a> for FacetKeyCodec<T>
where
T: BytesEncode<'a>,
T::EItem: Sized,
{
type EItem = FacetKey<T::EItem>;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut v = vec![];
v.extend_from_slice(&value.field_id.to_be_bytes());
v.extend_from_slice(&[value.level]);
let bound = T::bytes_encode(&value.left_bound)?;
v.extend_from_slice(&bound);
Some(Cow::Owned(v))
}
}
impl<'a, T> heed::BytesDecode<'a> for FacetKeyCodec<T>
where
T: BytesDecode<'a>,
{
type DItem = FacetKey<T::DItem>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
let level = bytes[2];
let bound = T::bytes_decode(&bytes[3..])?;
Some(FacetKey { field_id: fid, level, left_bound: bound })
}
}
pub struct FacetGroupValueCodec;
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
type EItem = FacetGroupValue;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut v = vec![];
v.push(value.size);
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
Some(Cow::Owned(v))
}
}
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
type DItem = FacetGroupValue;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let size = bytes[0];
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
Some(FacetGroupValue { size, bitmap })
}
}
// TODO: get rid of this codec as it is named confusingly + should really be part of heed
// or even replace the current ByteSlice codec
pub struct MyByteSlice;
impl<'a> BytesEncode<'a> for MyByteSlice {
type EItem = &'a [u8];
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item))
}
}
impl<'a> BytesDecode<'a> for MyByteSlice {
type DItem = &'a [u8];
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Some(bytes)
}
}

View File

@ -14,15 +14,10 @@ use time::OffsetDateTime;
use crate::error::{InternalError, UserError}; use crate::error::{InternalError, UserError};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::fields_ids_map::FieldsIdsMap; use crate::fields_ids_map::FieldsIdsMap;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::facet::new::str_ref::StrRefCodec; use crate::heed_codec::facet::StrRefCodec;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec}; use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec};
use crate::heed_codec::facet::{ use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec};
// FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
FieldDocIdFacetF64Codec,
FieldDocIdFacetStringCodec,
FieldIdCodec,
};
use crate::{ use crate::{
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion, default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId, DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
@ -130,9 +125,9 @@ pub struct Index {
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>, pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them. /// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
pub facet_id_f64_docids: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
/// Maps the facet field id and ranges of strings with the docids that corresponds to them. /// Maps the facet field id and ranges of strings with the docids that corresponds to them.
pub facet_id_string_docids: Database<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>, pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
/// Maps the document id, the facet field id and the numbers. /// Maps the document id, the facet field id and the numbers.
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>, pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,

View File

@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult}; use super::{Criterion, CriterionParameters, CriterionResult};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::facet::facet_sort_ascending::ascending_facet_sort; use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
use crate::search::facet::facet_sort_descending::descending_facet_sort; use crate::search::facet::facet_sort_descending::descending_facet_sort;
@ -196,14 +196,14 @@ fn facet_ordered<'t>(
let number_iter = make_iter( let number_iter = make_iter(
rtxn, rtxn,
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
field_id, field_id,
candidates.clone(), candidates.clone(),
)?; )?;
let string_iter = make_iter( let string_iter = make_iter(
rtxn, rtxn,
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
field_id, field_id,
candidates, candidates,
)?; )?;

View File

@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
use super::{Distinct, DocIter}; use super::{Distinct, DocIter};
use crate::error::InternalError; use crate::error::InternalError;
use crate::heed_codec::facet::new::FacetKey; use crate::heed_codec::facet::FacetGroupKey;
use crate::heed_codec::facet::*; use crate::heed_codec::facet::*;
use crate::index::db_name; use crate::index::db_name;
use crate::{DocumentId, FieldId, Index, Result}; use crate::{DocumentId, FieldId, Index, Result};
@ -48,7 +48,7 @@ impl<'a> FacetDistinctIter<'a> {
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> { fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
self.index self.index
.facet_id_string_docids .facet_id_string_docids
.get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key }) .get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
.map(|opt| opt.map(|v| v.bitmap)) .map(|opt| opt.map(|v| v.bitmap))
} }
@ -56,7 +56,7 @@ impl<'a> FacetDistinctIter<'a> {
// get facet docids on level 0 // get facet docids on level 0
self.index self.index
.facet_id_f64_docids .facet_id_f64_docids
.get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key }) .get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
.map(|opt| opt.map(|v| v.bitmap)) .map(|opt| opt.map(|v| v.bitmap))
} }

View File

@ -8,12 +8,11 @@ use roaring::RoaringBitmap;
use crate::error::UserError; use crate::error::UserError;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::facet::new::str_ref::StrRefCodec; use crate::heed_codec::facet::StrRefCodec;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec}; use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
use crate::search::facet::facet_distribution_iter; use crate::search::facet::facet_distribution_iter;
// use crate::search::facet::FacetStringIter;
use crate::{FieldId, Index, Result}; use crate::{FieldId, Index, Result};
/// The default number of values by facets that will /// The default number of values by facets that will
@ -138,7 +137,7 @@ impl<'a> FacetDistribution<'a> {
) -> heed::Result<()> { ) -> heed::Result<()> {
facet_distribution_iter::iterate_over_facet_distribution( facet_distribution_iter::iterate_over_facet_distribution(
self.rtxn, self.rtxn,
self.index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
field_id, field_id,
candidates, candidates,
|facet_key, nbr_docids| { |facet_key, nbr_docids| {
@ -161,7 +160,7 @@ impl<'a> FacetDistribution<'a> {
) -> heed::Result<()> { ) -> heed::Result<()> {
facet_distribution_iter::iterate_over_facet_distribution( facet_distribution_iter::iterate_over_facet_distribution(
self.rtxn, self.rtxn,
self.index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
field_id, field_id,
candidates, candidates,
|facet_key, nbr_docids| { |facet_key, nbr_docids| {
@ -191,7 +190,7 @@ impl<'a> FacetDistribution<'a> {
let iter = db let iter = db
.as_polymorph() .as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
.remap_types::<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>(); .remap_types::<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>();
for result in iter { for result in iter {
let (key, value) = result?; let (key, value) = result?;
@ -206,7 +205,7 @@ impl<'a> FacetDistribution<'a> {
.facet_id_string_docids .facet_id_string_docids
.as_polymorph() .as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())? .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
.remap_types::<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>(); .remap_types::<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
// TODO: get the original value of the facet somewhere (in the documents DB?) // TODO: get the original value of the facet somewhere (in the documents DB?)
for result in iter { for result in iter {

View File

@ -4,11 +4,11 @@ use heed::Result;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level}; use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupValueCodec, FacetGroupKeyCodec};
pub fn iterate_over_facet_distribution<'t, CB>( pub fn iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
candidates: &RoaringBitmap, candidates: &RoaringBitmap,
callback: CB, callback: CB,
@ -18,9 +18,9 @@ where
{ {
let mut fd = FacetDistribution { rtxn, db, field_id, callback }; let mut fd = FacetDistribution { rtxn, db, field_id, callback };
let highest_level = let highest_level =
get_highest_level(rtxn, db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?; get_highest_level(rtxn, db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), field_id)?;
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? { if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?; fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
return Ok(()); return Ok(());
} else { } else {
@ -33,7 +33,7 @@ where
CB: FnMut(&'t [u8], u64) -> ControlFlow<()>, CB: FnMut(&'t [u8], u64) -> ControlFlow<()>,
{ {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
callback: CB, callback: CB,
} }
@ -49,7 +49,7 @@ where
group_size: usize, group_size: usize,
) -> Result<ControlFlow<()>> { ) -> Result<ControlFlow<()>> {
let starting_key = let starting_key =
FacetKey { field_id: self.field_id, level: 0, left_bound: starting_bound }; FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_bound };
let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size); let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size);
for el in iter { for el in iter {
let (key, value) = el?; let (key, value) = el?;
@ -78,7 +78,7 @@ where
if level == 0 { if level == 0 {
return self.iterate_level_0(candidates, starting_bound, group_size); return self.iterate_level_0(candidates, starting_bound, group_size);
} }
let starting_key = FacetKey { field_id: self.field_id, level, left_bound: starting_bound }; let starting_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound };
let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size); let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size);
for el in iter { for el in iter {
@ -116,7 +116,7 @@ mod tests {
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::iterate_over_facet_distribution; use super::iterate_over_facet_distribution;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::test::FacetIndex; use crate::search::facet::test::FacetIndex;

View File

@ -4,12 +4,12 @@ use heed::BytesEncode;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef};
use crate::Result; use crate::Result;
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>( pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<BoundCodec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<BoundCodec>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>, right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
@ -42,13 +42,13 @@ where
} }
Bound::Unbounded => Bound::Unbounded, Bound::Unbounded => Bound::Unbounded,
}; };
let db = db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(); let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids }; let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids };
let highest_level = get_highest_level(rtxn, db, field_id)?; let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? { if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap(); let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?; f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
Ok(docids) Ok(docids)
} else { } else {
@ -59,7 +59,7 @@ where
/// Fetch the document ids that have a facet with a value between the two given bounds /// Fetch the document ids that have a facet with a value between the two given bounds
struct FacetRangeSearch<'t, 'b, 'bitmap> { struct FacetRangeSearch<'t, 'b, 'bitmap> {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
left: Bound<&'b [u8]>, left: Bound<&'b [u8]>,
right: Bound<&'b [u8]>, right: Bound<&'b [u8]>,
@ -68,7 +68,7 @@ struct FacetRangeSearch<'t, 'b, 'bitmap> {
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> { impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> { fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
let left_key = let left_key =
FacetKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound }; FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };
let iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size); let iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
for el in iter { for el in iter {
let (key, value) = el?; let (key, value) = el?;
@ -117,7 +117,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
return self.run_level_0(starting_left_bound, group_size); return self.run_level_0(starting_left_bound, group_size);
} }
let left_key = FacetKey { field_id: self.field_id, level, left_bound: starting_left_bound }; let left_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound };
let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size); let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
let (mut previous_key, mut previous_value) = iter.next().unwrap()?; let (mut previous_key, mut previous_value) = iter.next().unwrap()?;
@ -258,8 +258,8 @@ mod tests {
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::find_docids_of_facet_within_bounds; use super::find_docids_of_facet_within_bounds;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::FacetKeyCodec; use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::test::FacetIndex; use crate::search::facet::test::FacetIndex;
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
@ -310,7 +310,7 @@ mod tests {
let end = Bound::Included(i); let end = Bound::Included(i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,
@ -326,7 +326,7 @@ mod tests {
let end = Bound::Excluded(i); let end = Bound::Excluded(i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,
@ -352,7 +352,7 @@ mod tests {
let end = Bound::Included(255.); let end = Bound::Included(255.);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,
@ -371,7 +371,7 @@ mod tests {
let end = Bound::Excluded(255.); let end = Bound::Excluded(255.);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,
@ -399,7 +399,7 @@ mod tests {
let end = Bound::Included(255. - i); let end = Bound::Included(255. - i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,
@ -418,7 +418,7 @@ mod tests {
let end = Bound::Excluded(255. - i); let end = Bound::Excluded(255. - i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>( let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn, &txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(), index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0, 0,
&start, &start,
&end, &end,

View File

@ -2,19 +2,19 @@ use heed::Result;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level}; use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::new::{ use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
}; };
pub fn ascending_facet_sort<'t>( pub fn ascending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
candidates: RoaringBitmap, candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> { ) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
let highest_level = get_highest_level(rtxn, db, field_id)?; let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? { if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound }; let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX); let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] })) Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }))
@ -25,11 +25,11 @@ pub fn ascending_facet_sort<'t>(
struct AscendingFacetSort<'t, 'e> { struct AscendingFacetSort<'t, 'e> {
rtxn: &'t heed::RoTxn<'e>, rtxn: &'t heed::RoTxn<'e>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
stack: Vec<( stack: Vec<(
RoaringBitmap, RoaringBitmap,
std::iter::Take<heed::RoRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>, std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
)>, )>,
} }
@ -41,7 +41,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
let (documents_ids, deepest_iter) = self.stack.last_mut()?; let (documents_ids, deepest_iter) = self.stack.last_mut()?;
for result in deepest_iter { for result in deepest_iter {
let ( let (
FacetKey { level, left_bound, field_id }, FacetGroupKey { level, left_bound, field_id },
FacetGroupValue { size: group_size, mut bitmap }, FacetGroupValue { size: group_size, mut bitmap },
) = result.unwrap(); ) = result.unwrap();
// The range is unbounded on the right and the group size for the highest level is MAX, // The range is unbounded on the right and the group size for the highest level is MAX,
@ -65,7 +65,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
return Some(Ok(bitmap)); return Some(Ok(bitmap));
} }
let starting_key_below = let starting_key_below =
FacetKey { field_id: self.field_id, level: level - 1, left_bound }; FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound };
let iter = match self.db.range(&self.rtxn, &(starting_key_below..)) { let iter = match self.db.range(&self.rtxn, &(starting_key_below..)) {
Ok(iter) => iter, Ok(iter) => iter,
Err(e) => return Some(Err(e.into())), Err(e) => return Some(Err(e.into())),
@ -86,7 +86,7 @@ mod tests {
use rand::{Rng, SeedableRng}; use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::facet_sort_ascending::ascending_facet_sort; use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
use crate::search::facet::test::FacetIndex; use crate::search::facet::test::FacetIndex;

View File

@ -4,21 +4,21 @@ use heed::Result;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level, get_last_facet_value}; use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::new::{ use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
}; };
pub fn descending_facet_sort<'t>( pub fn descending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
candidates: RoaringBitmap, candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> { ) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
let highest_level = get_highest_level(rtxn, db, field_id)?; let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? { if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound }; let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap(); let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
let last_key = FacetKey { field_id, level: highest_level, left_bound: last_bound }; let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX); let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
Ok(Box::new(DescendingFacetSort { Ok(Box::new(DescendingFacetSort {
rtxn, rtxn,
@ -33,11 +33,11 @@ pub fn descending_facet_sort<'t>(
struct DescendingFacetSort<'t> { struct DescendingFacetSort<'t> {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
stack: Vec<( stack: Vec<(
RoaringBitmap, RoaringBitmap,
std::iter::Take<heed::RoRevRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>, std::iter::Take<heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
Bound<&'t [u8]>, Bound<&'t [u8]>,
)>, )>,
} }
@ -50,7 +50,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
let (documents_ids, deepest_iter, right_bound) = self.stack.last_mut()?; let (documents_ids, deepest_iter, right_bound) = self.stack.last_mut()?;
while let Some(result) = deepest_iter.next() { while let Some(result) = deepest_iter.next() {
let ( let (
FacetKey { level, left_bound, field_id }, FacetGroupKey { level, left_bound, field_id },
FacetGroupValue { size: group_size, mut bitmap }, FacetGroupValue { size: group_size, mut bitmap },
) = result.unwrap(); ) = result.unwrap();
// The range is unbounded on the right and the group size for the highest level is MAX, // The range is unbounded on the right and the group size for the highest level is MAX,
@ -72,15 +72,15 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
if level == 0 { if level == 0 {
return Some(Ok(bitmap)); return Some(Ok(bitmap));
} }
let starting_key_below = FacetKey { field_id, level: level - 1, left_bound }; let starting_key_below = FacetGroupKey { field_id, level: level - 1, left_bound };
let end_key_kelow = match *right_bound { let end_key_kelow = match *right_bound {
Bound::Included(right) => Bound::Included(FacetKey { Bound::Included(right) => Bound::Included(FacetGroupKey {
field_id, field_id,
level: level - 1, level: level - 1,
left_bound: right, left_bound: right,
}), }),
Bound::Excluded(right) => Bound::Excluded(FacetKey { Bound::Excluded(right) => Bound::Excluded(FacetGroupKey {
field_id, field_id,
level: level - 1, level: level - 1,
left_bound: right, left_bound: right,
@ -90,7 +90,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
let prev_right_bound = *right_bound; let prev_right_bound = *right_bound;
*right_bound = Bound::Excluded(left_bound); *right_bound = Bound::Excluded(left_bound);
let iter = let iter =
match self.db.remap_key_type::<FacetKeyCodec<MyByteSlice>>().rev_range( match self.db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>().rev_range(
&self.rtxn, &self.rtxn,
&(Bound::Included(starting_key_below), end_key_kelow), &(Bound::Included(starting_key_below), end_key_kelow),
) { ) {
@ -114,8 +114,8 @@ mod tests {
use rand::{Rng, SeedableRng}; use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef};
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort; use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::test::FacetIndex; use crate::search::facet::test::FacetIndex;
@ -162,7 +162,7 @@ mod tests {
let txn = index.env.read_txn().unwrap(); let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>(); let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
let mut results = String::new(); let mut results = String::new();
let db = index.db.content.remap_key_type::<FacetKeyCodec<MyByteSlice>>(); let db = index.db.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap(); let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter { for el in iter {
let docids = el.unwrap(); let docids = el.unwrap();

View File

@ -9,8 +9,8 @@ use roaring::RoaringBitmap;
use super::facet_range_search; use super::facet_range_search;
use crate::error::{Error, UserError}; use crate::error::{Error, UserError};
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result}; use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
/// The maximum number of filters the filter AST can process. /// The maximum number of filters the filter AST can process.
@ -180,7 +180,11 @@ impl<'a> Filter<'a> {
let string_docids = strings_db let string_docids = strings_db
.get( .get(
rtxn, rtxn,
&FacetKey { field_id, level: 0, left_bound: &val.value().to_lowercase() }, &FacetGroupKey {
field_id,
level: 0,
left_bound: &val.value().to_lowercase(),
},
)? )?
.map(|v| v.bitmap) .map(|v| v.bitmap)
.unwrap_or_default(); .unwrap_or_default();
@ -218,10 +222,10 @@ impl<'a> Filter<'a> {
.remap_data_type::<DecodeIgnore>() .remap_data_type::<DecodeIgnore>()
.get_lower_than_or_equal_to( .get_lower_than_or_equal_to(
rtxn, rtxn,
&FacetKey { field_id, level: u8::MAX, left_bound: f64::MAX }, &FacetGroupKey { field_id, level: u8::MAX, left_bound: f64::MAX },
)? )?
.and_then( .and_then(
|(FacetKey { field_id: id, level, .. }, _)| { |(FacetGroupKey { field_id: id, level, .. }, _)| {
if id == field_id { if id == field_id {
Some(level) Some(level)
} else { } else {
@ -252,7 +256,7 @@ impl<'a> Filter<'a> {
/// going deeper through the levels. /// going deeper through the levels.
fn explore_facet_number_levels( fn explore_facet_number_levels(
rtxn: &heed::RoTxn, rtxn: &heed::RoTxn,
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
field_id: FieldId, field_id: FieldId,
level: u8, level: u8,
left: Bound<f64>, left: Bound<f64>,

View File

@ -3,7 +3,7 @@ use heed::{BytesDecode, RoTxn};
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET}; pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
pub use self::filter::Filter; pub use self::filter::Filter;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
mod facet_distribution; mod facet_distribution;
mod facet_distribution_iter; mod facet_distribution_iter;
@ -14,7 +14,7 @@ mod filter;
pub(crate) fn get_first_facet_value<'t, BoundCodec>( pub(crate) fn get_first_facet_value<'t, BoundCodec>(
txn: &'t RoTxn, txn: &'t RoTxn,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>> ) -> heed::Result<Option<BoundCodec::DItem>>
where where
@ -28,7 +28,7 @@ where
if let Some(first) = level0_iter_forward.next() { if let Some(first) = level0_iter_forward.next() {
let (first_key, _) = first?; let (first_key, _) = first?;
let first_key = let first_key =
FacetKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?; FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?;
Ok(Some(first_key.left_bound)) Ok(Some(first_key.left_bound))
} else { } else {
Ok(None) Ok(None)
@ -36,7 +36,7 @@ where
} }
pub(crate) fn get_last_facet_value<'t, BoundCodec>( pub(crate) fn get_last_facet_value<'t, BoundCodec>(
txn: &'t RoTxn, txn: &'t RoTxn,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>> ) -> heed::Result<Option<BoundCodec::DItem>>
where where
@ -51,7 +51,7 @@ where
if let Some(last) = level0_iter_backward.next() { if let Some(last) = level0_iter_backward.next() {
let (last_key, _) = last?; let (last_key, _) = last?;
let last_key = let last_key =
FacetKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?; FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?;
Ok(Some(last_key.left_bound)) Ok(Some(last_key.left_bound))
} else { } else {
Ok(None) Ok(None)
@ -59,7 +59,7 @@ where
} }
pub(crate) fn get_highest_level<'t>( pub(crate) fn get_highest_level<'t>(
txn: &'t RoTxn<'t>, txn: &'t RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
) -> heed::Result<u8> { ) -> heed::Result<u8> {
let field_id_prefix = &field_id.to_be_bytes(); let field_id_prefix = &field_id.to_be_bytes();
@ -69,7 +69,7 @@ pub(crate) fn get_highest_level<'t>(
.next() .next()
.map(|el| { .map(|el| {
let (key, _) = el.unwrap(); let (key, _) = el.unwrap();
let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(key).unwrap(); let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key).unwrap();
key.level key.level
}) })
.unwrap_or(0)) .unwrap_or(0))
@ -84,8 +84,8 @@ pub mod test {
use heed::{BytesDecode, BytesEncode, Env, RwTxn}; use heed::{BytesDecode, BytesEncode, Env, RwTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::{ use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
}; };
use crate::snapshot_tests::display_bitmap; use crate::snapshot_tests::display_bitmap;
use crate::update::FacetsUpdateIncremental; use crate::update::FacetsUpdateIncremental;
@ -101,7 +101,7 @@ pub mod test {
} }
pub struct Database { pub struct Database {
pub content: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
pub group_size: usize, pub group_size: usize,
pub max_group_size: usize, pub max_group_size: usize,
_tempdir: Rc<tempfile::TempDir>, _tempdir: Rc<tempfile::TempDir>,
@ -184,7 +184,7 @@ pub mod test {
let mut iter = self.db.content.iter(&txn).unwrap(); let mut iter = self.db.content.iter(&txn).unwrap();
while let Some(el) = iter.next() { while let Some(el) = iter.next() {
let (key, value) = el.unwrap(); let (key, value) = el.unwrap();
let FacetKey { field_id, level, left_bound: bound } = key; let FacetGroupKey { field_id, level, left_bound: bound } = key;
let bound = BoundCodec::bytes_decode(bound).unwrap(); let bound = BoundCodec::bytes_decode(bound).unwrap();
let FacetGroupValue { size, bitmap } = value; let FacetGroupValue { size, bitmap } = value;
writeln!( writeln!(

View File

@ -5,7 +5,7 @@ use std::path::Path;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::new::{FacetGroupValue, FacetKey}; use crate::heed_codec::facet::{FacetGroupValue, FacetGroupKey};
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index}; use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
#[track_caller] #[track_caller]
@ -280,7 +280,7 @@ pub fn snap_word_prefix_position_docids(index: &Index) -> String {
} }
pub fn snap_facet_id_f64_docids(index: &Index) -> String { pub fn snap_facet_id_f64_docids(index: &Index) -> String {
let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |( let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |(
FacetKey { field_id, level, left_bound }, FacetGroupKey { field_id, level, left_bound },
FacetGroupValue { size, bitmap }, FacetGroupValue { size, bitmap },
)| { )| {
&format!("{field_id:<3} {level:<2} {left_bound:<6} {size:<2} {}", display_bitmap(&bitmap)) &format!("{field_id:<3} {level:<2} {left_bound:<6} {size:<2} {}", display_bitmap(&bitmap))
@ -289,7 +289,7 @@ pub fn snap_facet_id_f64_docids(index: &Index) -> String {
} }
pub fn snap_facet_id_string_docids(index: &Index) -> String { pub fn snap_facet_id_string_docids(index: &Index) -> String {
let snap = make_db_snap_from_iter!(index, facet_id_string_docids, |( let snap = make_db_snap_from_iter!(index, facet_id_string_docids, |(
FacetKey { field_id, level, left_bound }, FacetGroupKey { field_id, level, left_bound },
FacetGroupValue { size, bitmap }, FacetGroupValue { size, bitmap },
)| { )| {
&format!("{field_id:<3} {level:<2} {left_bound:<12} {size:<2} {}", display_bitmap(&bitmap)) &format!("{field_id:<3} {level:<2} {left_bound:<12} {size:<2} {}", display_bitmap(&bitmap))

View File

@ -11,7 +11,7 @@ use time::OffsetDateTime;
use super::{ClearDocuments, FacetsUpdateBulk}; use super::{ClearDocuments, FacetsUpdateBulk};
use crate::error::{InternalError, UserError}; use crate::error::{InternalError, UserError};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
use crate::heed_codec::CboRoaringBitmapCodec; use crate::heed_codec::CboRoaringBitmapCodec;
use crate::index::{db_name, main_key}; use crate::index::{db_name, main_key};
use crate::{ use crate::{
@ -626,10 +626,10 @@ fn remove_docids_from_facet_id_docids<'a>(
) -> Result<()> { ) -> Result<()> {
let db = match facet_type { let db = match facet_type {
FacetType::String => { FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
} }
FacetType::Number => { FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
} }
}; };
let mut modified = false; let mut modified = false;

View File

@ -12,8 +12,8 @@ use time::OffsetDateTime;
use crate::error::InternalError; use crate::error::InternalError;
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::new::{ use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
}; };
use crate::update::index_documents::{ use crate::update::index_documents::{
create_writer, valid_lmdb_key, write_into_lmdb_database, writer_into_reader, create_writer, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
@ -22,7 +22,7 @@ use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
pub struct FacetsUpdateBulk<'i> { pub struct FacetsUpdateBulk<'i> {
index: &'i Index, index: &'i Index,
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
level_group_size: usize, level_group_size: usize,
min_level_size: usize, min_level_size: usize,
facet_type: FacetType, facet_type: FacetType,
@ -40,10 +40,10 @@ impl<'i> FacetsUpdateBulk<'i> {
index, index,
database: match facet_type { database: match facet_type {
FacetType::String => { FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
} }
FacetType::Number => { FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
} }
}, },
level_group_size: 4, level_group_size: 4,
@ -61,10 +61,10 @@ impl<'i> FacetsUpdateBulk<'i> {
index, index,
database: match facet_type { database: match facet_type {
FacetType::String => { FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
} }
FacetType::Number => { FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
} }
}, },
level_group_size: 4, level_group_size: 4,
@ -89,8 +89,8 @@ impl<'i> FacetsUpdateBulk<'i> {
} }
fn clear_levels(&self, wtxn: &mut heed::RwTxn, field_id: FieldId) -> Result<()> { fn clear_levels(&self, wtxn: &mut heed::RwTxn, field_id: FieldId) -> Result<()> {
let left = FacetKey::<&[u8]> { field_id, level: 1, left_bound: &[] }; let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
let right = FacetKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] }; let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
let range = left..=right; let range = left..=right;
self.database.delete_range(wtxn, &range).map(drop)?; self.database.delete_range(wtxn, &range).map(drop)?;
Ok(()) Ok(())
@ -119,7 +119,7 @@ impl<'i> FacetsUpdateBulk<'i> {
for level_reader in level_readers { for level_reader in level_readers {
let mut cursor = level_reader.into_cursor()?; let mut cursor = level_reader.into_cursor()?;
while let Some((k, v)) = cursor.move_on_next()? { while let Some((k, v)) = cursor.move_on_next()? {
let key = FacetKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap(); let key = FacetGroupKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap();
let value = FacetGroupValueCodec::bytes_decode(v).unwrap(); let value = FacetGroupValueCodec::bytes_decode(v).unwrap();
println!("inserting {key:?} {value:?}"); println!("inserting {key:?} {value:?}");
@ -210,7 +210,7 @@ impl<'i> FacetsUpdateBulk<'i> {
struct ComputeHigherLevels<'t> { struct ComputeHigherLevels<'t> {
rtxn: &'t heed::RoTxn<'t>, rtxn: &'t heed::RoTxn<'t>,
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: &'t heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16, field_id: u16,
level_group_size: usize, level_group_size: usize,
min_level_size: usize, min_level_size: usize,
@ -233,7 +233,7 @@ impl<'t> ComputeHigherLevels<'t> {
.db .db
.as_polymorph() .as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, level_0_prefix.as_slice())? .prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, level_0_prefix.as_slice())?
.remap_types::<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>(); .remap_types::<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>();
let mut left_bound: &[u8] = &[]; let mut left_bound: &[u8] = &[];
let mut first_iteration_for_new_group = true; let mut first_iteration_for_new_group = true;
@ -311,9 +311,9 @@ impl<'t> ComputeHigherLevels<'t> {
for ((bitmap, left_bound), group_size) in for ((bitmap, left_bound), group_size) in
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{ {
let key = FacetKey { field_id: self.field_id, level, left_bound }; let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
let key = let key =
FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?; FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap }; let value = FacetGroupValue { size: group_size, bitmap };
let value = let value =
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
@ -329,9 +329,9 @@ impl<'t> ComputeHigherLevels<'t> {
for ((bitmap, left_bound), group_size) in for ((bitmap, left_bound), group_size) in
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..)) bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{ {
let key = FacetKey { field_id: self.field_id, level, left_bound }; let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
let key = let key =
FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?; FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap }; let value = FacetGroupValue { size: group_size, bitmap };
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?; let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
cur_writer.insert(key, value)?; cur_writer.insert(key, value)?;

View File

@ -2,8 +2,8 @@ use heed::types::ByteSlice;
use heed::{BytesDecode, Error, RoTxn, RwTxn}; use heed::{BytesDecode, Error, RoTxn, RwTxn};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::{ use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice, FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
}; };
use crate::search::facet::get_highest_level; use crate::search::facet::get_highest_level;
use crate::Result; use crate::Result;
@ -19,13 +19,13 @@ enum DeletionResult {
} }
pub struct FacetsUpdateIncremental { pub struct FacetsUpdateIncremental {
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
group_size: usize, group_size: usize,
min_level_size: usize, min_level_size: usize,
max_group_size: usize, max_group_size: usize,
} }
impl FacetsUpdateIncremental { impl FacetsUpdateIncremental {
pub fn new(db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>) -> Self { pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self {
Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 } Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 }
} }
} }
@ -36,7 +36,7 @@ impl FacetsUpdateIncremental {
level: u8, level: u8,
search_key: &[u8], search_key: &[u8],
txn: &RoTxn, txn: &RoTxn,
) -> Result<(FacetKey<Vec<u8>>, FacetGroupValue)> { ) -> Result<(FacetGroupKey<Vec<u8>>, FacetGroupValue)> {
let mut prefix = vec![]; let mut prefix = vec![];
prefix.extend_from_slice(&field_id.to_be_bytes()); prefix.extend_from_slice(&field_id.to_be_bytes());
prefix.push(level); prefix.push(level);
@ -45,17 +45,17 @@ impl FacetsUpdateIncremental {
let mut prefix_iter = self let mut prefix_iter = self
.db .db
.as_polymorph() .as_polymorph()
.prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(txn, &prefix.as_slice())?; .prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(txn, &prefix.as_slice())?;
if let Some(e) = prefix_iter.next() { if let Some(e) = prefix_iter.next() {
let (key_bytes, value) = e?; let (key_bytes, value) = e?;
Ok(( Ok((
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes) FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)? .ok_or(Error::Encoding)?
.into_owned(), .into_owned(),
value, value,
)) ))
} else { } else {
let key = FacetKey { field_id, level, left_bound: search_key }; let key = FacetGroupKey { field_id, level, left_bound: search_key };
match self.db.get_lower_than(txn, &key)? { match self.db.get_lower_than(txn, &key)? {
Some((key, value)) => { Some((key, value)) => {
if key.level != level || key.field_id != field_id { if key.level != level || key.field_id != field_id {
@ -66,13 +66,13 @@ impl FacetsUpdateIncremental {
let mut iter = self let mut iter = self
.db .db
.as_polymorph() .as_polymorph()
.prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>( .prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(
txn, txn,
&prefix.as_slice(), &prefix.as_slice(),
)?; )?;
let (key_bytes, value) = iter.next().unwrap()?; let (key_bytes, value) = iter.next().unwrap()?;
Ok(( Ok((
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes) FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)? .ok_or(Error::Encoding)?
.into_owned(), .into_owned(),
value, value,
@ -93,7 +93,7 @@ impl FacetsUpdateIncremental {
new_key: &[u8], new_key: &[u8],
new_values: &RoaringBitmap, new_values: &RoaringBitmap,
) -> Result<InsertionResult> { ) -> Result<InsertionResult> {
let key = FacetKey { field_id, level: 0, left_bound: new_key }; let key = FacetGroupKey { field_id, level: 0, left_bound: new_key };
let value = FacetGroupValue { bitmap: new_values.clone(), size: 1 }; let value = FacetGroupValue { bitmap: new_values.clone(), size: 1 };
let mut level0_prefix = vec![]; let mut level0_prefix = vec![];
@ -193,7 +193,7 @@ impl FacetsUpdateIncremental {
.db .db
.get_greater_than_or_equal_to( .get_greater_than_or_equal_to(
&txn, &txn,
&FacetKey { &FacetGroupKey {
field_id, field_id,
level: level_below, level: level_below,
left_bound: insertion_key.left_bound.as_slice(), left_bound: insertion_key.left_bound.as_slice(),
@ -217,7 +217,7 @@ impl FacetsUpdateIncremental {
} }
let key = let key =
FacetKey { field_id, level, left_bound: insertion_key.left_bound.clone() }; FacetGroupKey { field_id, level, left_bound: insertion_key.left_bound.clone() };
let value = FacetGroupValue { size: size_left as u8, bitmap: values_left }; let value = FacetGroupValue { size: size_left as u8, bitmap: values_left };
(key, value) (key, value)
}; };
@ -235,7 +235,7 @@ impl FacetsUpdateIncremental {
} }
let key = let key =
FacetKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() }; FacetGroupKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() };
let value = FacetGroupValue { size: size_right as u8, bitmap: values_right }; let value = FacetGroupValue { size: size_right as u8, bitmap: values_right };
(key, value) (key, value)
}; };
@ -303,7 +303,7 @@ impl FacetsUpdateIncremental {
let mut values = RoaringBitmap::new(); let mut values = RoaringBitmap::new();
for _ in 0..group_size { for _ in 0..group_size {
let (key_bytes, value_i) = groups_iter.next().unwrap()?; let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes) let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?; .ok_or(Error::Encoding)?;
if first_key.is_none() { if first_key.is_none() {
@ -311,7 +311,7 @@ impl FacetsUpdateIncremental {
} }
values |= value_i.bitmap; values |= value_i.bitmap;
} }
let key = FacetKey { let key = FacetGroupKey {
field_id, field_id,
level: highest_level + 1, level: highest_level + 1,
left_bound: first_key.unwrap().left_bound, left_bound: first_key.unwrap().left_bound,
@ -384,7 +384,7 @@ impl FacetsUpdateIncremental {
key: &[u8], key: &[u8],
value: u32, value: u32,
) -> Result<DeletionResult> { ) -> Result<DeletionResult> {
let key = FacetKey { field_id, level: 0, left_bound: key }; let key = FacetGroupKey { field_id, level: 0, left_bound: key };
let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap; let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap;
bitmap.remove(value); bitmap.remove(value);
@ -415,7 +415,7 @@ impl FacetsUpdateIncremental {
key: &[u8], key: &[u8],
value: u32, value: u32,
) -> Result<()> { ) -> Result<()> {
if self.db.get(txn, &FacetKey { field_id, level: 0, left_bound: key })?.is_none() { if self.db.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: key })?.is_none() {
return Ok(()); return Ok(());
} }
let highest_level = get_highest_level(&txn, self.db, field_id)?; let highest_level = get_highest_level(&txn, self.db, field_id)?;
@ -450,7 +450,7 @@ impl FacetsUpdateIncremental {
while let Some(el) = iter.next() { while let Some(el) = iter.next() {
let (k, _) = el?; let (k, _) = el?;
to_delete.push( to_delete.push(
FacetKeyCodec::<MyByteSlice>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(), FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(),
); );
} }
drop(iter); drop(iter);
@ -469,9 +469,9 @@ mod tests {
use rand::{Rng, SeedableRng}; use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec; use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::str_ref::StrRefCodec; use crate::heed_codec::facet::str_ref::StrRefCodec;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice}; use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
use crate::milli_snap; use crate::milli_snap;
use crate::search::facet::get_highest_level; use crate::search::facet::get_highest_level;
use crate::search::facet::test::FacetIndex; use crate::search::facet::test::FacetIndex;
@ -502,7 +502,7 @@ mod tests {
.unwrap(); .unwrap();
while let Some(el) = iter.next() { while let Some(el) = iter.next() {
let (key, value) = el.unwrap(); let (key, value) = el.unwrap();
let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key).unwrap(); let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key).unwrap();
let mut prefix_start_below = vec![]; let mut prefix_start_below = vec![];
prefix_start_below.extend_from_slice(&field_id.to_be_bytes()); prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
@ -519,7 +519,7 @@ mod tests {
) )
.unwrap(); .unwrap();
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap(); let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes).unwrap() FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
}; };
assert!(value.size > 0 && (value.size as usize) < db.max_group_size); assert!(value.size > 0 && (value.size as usize) < db.max_group_size);
@ -996,7 +996,7 @@ mod tests {
// for ((key, values), group) in values_field_id.iter().zip(level0iter) { // for ((key, values), group) in values_field_id.iter().zip(level0iter) {
// let (group_key, group_values) = group.unwrap(); // let (group_key, group_values) = group.unwrap();
// let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap(); // let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
// assert_eq!(key, &group_key.left_bound); // assert_eq!(key, &group_key.left_bound);
// assert_eq!(values, &group_values.bitmap); // assert_eq!(values, &group_values.bitmap);
// } // }
@ -1014,7 +1014,7 @@ mod tests {
// for ((key, values), group) in values_field_id.iter().zip(level0iter) { // for ((key, values), group) in values_field_id.iter().zip(level0iter) {
// let (group_key, group_values) = group.unwrap(); // let (group_key, group_values) = group.unwrap();
// let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap(); // let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
// assert_eq!(key, &group_key.left_bound); // assert_eq!(key, &group_key.left_bound);
// assert_eq!(values, &group_values.bitmap); // assert_eq!(values, &group_values.bitmap);
// } // }

View File

@ -1,23 +1,20 @@
use std::{collections::HashMap, fs::File}; use super::{FacetsUpdateBulk, FacetsUpdateIncremental};
use crate::{
facet::FacetType,
heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec},
CboRoaringBitmapCodec, FieldId, Index, Result,
};
use grenad::CompressionType; use grenad::CompressionType;
use heed::BytesDecode; use heed::BytesDecode;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use std::{collections::HashMap, fs::File};
use crate::{
facet::FacetType,
heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice},
CboRoaringBitmapCodec, FieldId, Index, Result,
};
use super::{FacetsUpdateBulk, FacetsUpdateIncremental};
pub mod bulk; pub mod bulk;
pub mod incremental; pub mod incremental;
pub struct FacetsUpdate<'i> { pub struct FacetsUpdate<'i> {
index: &'i Index, index: &'i Index,
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>, database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
level_group_size: u8, level_group_size: u8,
max_level_group_size: u8, max_level_group_size: u8,
min_level_size: u8, min_level_size: u8,
@ -28,10 +25,10 @@ impl<'i> FacetsUpdate<'i> {
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self { pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
let database = match facet_type { let database = match facet_type {
FacetType::String => { FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
} }
FacetType::Number => { FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>() index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
} }
}; };
Self { Self {
@ -70,8 +67,8 @@ impl<'i> FacetsUpdate<'i> {
let mut cursor = self.new_data.into_cursor()?; let mut cursor = self.new_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? { while let Some((key, value)) = cursor.move_on_next()? {
let key = let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key)
FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?; .ok_or(heed::Error::Encoding)?;
let docids = let docids =
CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?; CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?; indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;

View File

@ -6,9 +6,9 @@ use heed::{BytesDecode, BytesEncode};
use super::helpers::{ use super::helpers::{
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters, create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
}; };
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
use crate::heed_codec::facet::FieldDocIdFacetF64Codec; use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::Result; use crate::Result;
/// Extracts the facet number and the documents ids where this facet number appear. /// Extracts the facet number and the documents ids where this facet number appear.
@ -36,8 +36,8 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
let (field_id, document_id, number) = let (field_id, document_id, number) =
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap(); FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
let key = FacetKey { field_id, level: 0, left_bound: number }; let key = FacetGroupKey { field_id, level: 0, left_bound: number };
let key_bytes = FacetKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap(); let key_bytes = FacetGroupKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?; facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
} }

View File

@ -4,8 +4,8 @@ use std::io;
use heed::BytesEncode; use heed::BytesEncode;
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters}; use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
use crate::heed_codec::facet::new::str_ref::StrRefCodec; use crate::heed_codec::facet::StrRefCodec;
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec}; use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::update::index_documents::merge_cbo_roaring_bitmaps; use crate::update::index_documents::merge_cbo_roaring_bitmaps;
use crate::{FieldId, Result}; use crate::{FieldId, Result};
@ -43,8 +43,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
let document_id = u32::from_be_bytes(document_id_bytes); let document_id = u32::from_be_bytes(document_id_bytes);
let normalised_value = std::str::from_utf8(normalized_value_bytes)?; let normalised_value = std::str::from_utf8(normalized_value_bytes)?;
let key = FacetKey { field_id, level: 0, left_bound: normalised_value }; let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value };
let key_bytes = FacetKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap(); let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?; facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?;
} }

View File

@ -0,0 +1,4 @@
---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
6873ff1f78d08f2b1a13bb9e37349c01