Refactor facet-related codecs

This commit is contained in:
Loïc Lecrenier 2022-09-05 13:01:36 +02:00 committed by Loïc Lecrenier
parent 9b55e582cd
commit 485a72306d
22 changed files with 280 additions and 301 deletions

View File

@ -1,25 +1,19 @@
// mod facet_level_value_f64_codec;
// mod facet_level_value_u32_codec;
// mod facet_string_level_zero_codec;
// mod facet_string_level_zero_value_codec;
// mod facet_string_zero_bounds_value_codec;
mod field_doc_id_facet_f64_codec;
mod field_doc_id_facet_string_codec;
mod ordered_f64_codec;
mod str_ref;
pub mod new;
use heed::types::OwnedType;
// pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
// pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec;
// pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec;
// pub use self::facet_string_level_zero_value_codec::{
// decode_prefix_string, encode_prefix_string, FacetStringLevelZeroValueCodec,
// };
// pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec;
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
use crate::BEU16;
pub use self::ordered_f64_codec::OrderedF64Codec;
pub use self::str_ref::StrRefCodec;
use crate::{CboRoaringBitmapCodec, BEU16};
use heed::types::OwnedType;
use heed::{BytesDecode, BytesEncode};
use roaring::RoaringBitmap;
use std::borrow::Cow;
use std::convert::TryFrom;
use std::marker::PhantomData;
pub type FieldIdCodec = OwnedType<BEU16>;
@ -32,3 +26,109 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
None
}
}
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct FacetGroupKey<T> {
pub field_id: u16,
pub level: u8,
pub left_bound: T,
}
impl<'a> FacetGroupKey<&'a [u8]> {
pub fn into_owned(self) -> FacetGroupKey<Vec<u8>> {
FacetGroupKey {
field_id: self.field_id,
level: self.level,
left_bound: self.left_bound.to_vec(),
}
}
}
impl<'a> FacetGroupKey<Vec<u8>> {
pub fn as_ref(&self) -> FacetGroupKey<&[u8]> {
FacetGroupKey {
field_id: self.field_id,
level: self.level,
left_bound: self.left_bound.as_slice(),
}
}
}
#[derive(Debug)]
pub struct FacetGroupValue {
pub size: u8,
pub bitmap: RoaringBitmap,
}
pub struct FacetGroupKeyCodec<T> {
_phantom: PhantomData<T>,
}
impl<'a, T> heed::BytesEncode<'a> for FacetGroupKeyCodec<T>
where
T: BytesEncode<'a>,
T::EItem: Sized,
{
type EItem = FacetGroupKey<T::EItem>;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut v = vec![];
v.extend_from_slice(&value.field_id.to_be_bytes());
v.extend_from_slice(&[value.level]);
let bound = T::bytes_encode(&value.left_bound)?;
v.extend_from_slice(&bound);
Some(Cow::Owned(v))
}
}
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
where
T: BytesDecode<'a>,
{
type DItem = FacetGroupKey<T::DItem>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
let level = bytes[2];
let bound = T::bytes_decode(&bytes[3..])?;
Some(FacetGroupKey { field_id: fid, level, left_bound: bound })
}
}
pub struct FacetGroupValueCodec;
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
type EItem = FacetGroupValue;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut v = vec![];
v.push(value.size);
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
Some(Cow::Owned(v))
}
}
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
type DItem = FacetGroupValue;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let size = bytes[0];
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
Some(FacetGroupValue { size, bitmap })
}
}
pub struct ByteSliceRef;
impl<'a> BytesEncode<'a> for ByteSliceRef {
type EItem = &'a [u8];
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item))
}
}
impl<'a> BytesDecode<'a> for ByteSliceRef {
type DItem = &'a [u8];
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Some(bytes)
}
}

View File

@ -1,120 +0,0 @@
use std::borrow::Cow;
use std::convert::TryFrom;
use std::marker::PhantomData;
use heed::{BytesDecode, BytesEncode};
use roaring::RoaringBitmap;
use crate::CboRoaringBitmapCodec;
pub mod ordered_f64_codec;
pub mod str_ref;
// TODO: these codecs were quickly written and not fast/resilient enough
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
pub struct FacetKey<T> {
pub field_id: u16,
pub level: u8,
pub left_bound: T,
}
impl<'a> FacetKey<&'a [u8]> {
pub fn into_owned(self) -> FacetKey<Vec<u8>> {
FacetKey {
field_id: self.field_id,
level: self.level,
left_bound: self.left_bound.to_vec(),
}
}
}
impl<'a> FacetKey<Vec<u8>> {
pub fn as_ref(&self) -> FacetKey<&[u8]> {
FacetKey {
field_id: self.field_id,
level: self.level,
left_bound: self.left_bound.as_slice(),
}
}
}
#[derive(Debug)]
pub struct FacetGroupValue {
pub size: u8,
pub bitmap: RoaringBitmap,
}
pub struct FacetKeyCodec<T> {
_phantom: PhantomData<T>,
}
impl<'a, T> heed::BytesEncode<'a> for FacetKeyCodec<T>
where
T: BytesEncode<'a>,
T::EItem: Sized,
{
type EItem = FacetKey<T::EItem>;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut v = vec![];
v.extend_from_slice(&value.field_id.to_be_bytes());
v.extend_from_slice(&[value.level]);
let bound = T::bytes_encode(&value.left_bound)?;
v.extend_from_slice(&bound);
Some(Cow::Owned(v))
}
}
impl<'a, T> heed::BytesDecode<'a> for FacetKeyCodec<T>
where
T: BytesDecode<'a>,
{
type DItem = FacetKey<T::DItem>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
let level = bytes[2];
let bound = T::bytes_decode(&bytes[3..])?;
Some(FacetKey { field_id: fid, level, left_bound: bound })
}
}
pub struct FacetGroupValueCodec;
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
type EItem = FacetGroupValue;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
let mut v = vec![];
v.push(value.size);
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
Some(Cow::Owned(v))
}
}
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
type DItem = FacetGroupValue;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let size = bytes[0];
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
Some(FacetGroupValue { size, bitmap })
}
}
// TODO: get rid of this codec as it is named confusingly + should really be part of heed
// or even replace the current ByteSlice codec
pub struct MyByteSlice;
impl<'a> BytesEncode<'a> for MyByteSlice {
type EItem = &'a [u8];
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item))
}
}
impl<'a> BytesDecode<'a> for MyByteSlice {
type DItem = &'a [u8];
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Some(bytes)
}
}

View File

@ -14,15 +14,10 @@ use time::OffsetDateTime;
use crate::error::{InternalError, UserError};
use crate::facet::FacetType;
use crate::fields_ids_map::FieldsIdsMap;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec};
use crate::heed_codec::facet::{
// FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
FieldDocIdFacetF64Codec,
FieldDocIdFacetStringCodec,
FieldIdCodec,
};
use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::facet::StrRefCodec;
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec};
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec};
use crate::{
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
@ -130,9 +125,9 @@ pub struct Index {
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
pub facet_id_f64_docids: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
pub facet_id_f64_docids: Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
pub facet_id_string_docids: Database<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
pub facet_id_string_docids: Database<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
/// Maps the document id, the facet field id and the numbers.
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,

View File

@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::facet::FacetType;
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
use crate::search::facet::facet_sort_descending::descending_facet_sort;
@ -196,14 +196,14 @@ fn facet_ordered<'t>(
let number_iter = make_iter(
rtxn,
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
field_id,
candidates.clone(),
)?;
let string_iter = make_iter(
rtxn,
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
field_id,
candidates,
)?;

View File

@ -6,7 +6,7 @@ use roaring::RoaringBitmap;
use super::{Distinct, DocIter};
use crate::error::InternalError;
use crate::heed_codec::facet::new::FacetKey;
use crate::heed_codec::facet::FacetGroupKey;
use crate::heed_codec::facet::*;
use crate::index::db_name;
use crate::{DocumentId, FieldId, Index, Result};
@ -48,7 +48,7 @@ impl<'a> FacetDistinctIter<'a> {
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
self.index
.facet_id_string_docids
.get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key })
.get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
.map(|opt| opt.map(|v| v.bitmap))
}
@ -56,7 +56,7 @@ impl<'a> FacetDistinctIter<'a> {
// get facet docids on level 0
self.index
.facet_id_f64_docids
.get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key })
.get(self.txn, &FacetGroupKey { field_id: self.distinct, level: 0, left_bound: key })
.map(|opt| opt.map(|v| v.bitmap))
}

View File

@ -8,12 +8,11 @@ use roaring::RoaringBitmap;
use crate::error::UserError;
use crate::facet::FacetType;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::facet::StrRefCodec;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
use crate::search::facet::facet_distribution_iter;
// use crate::search::facet::FacetStringIter;
use crate::{FieldId, Index, Result};
/// The default number of values by facets that will
@ -138,7 +137,7 @@ impl<'a> FacetDistribution<'a> {
) -> heed::Result<()> {
facet_distribution_iter::iterate_over_facet_distribution(
self.rtxn,
self.index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
field_id,
candidates,
|facet_key, nbr_docids| {
@ -161,7 +160,7 @@ impl<'a> FacetDistribution<'a> {
) -> heed::Result<()> {
facet_distribution_iter::iterate_over_facet_distribution(
self.rtxn,
self.index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>(),
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
field_id,
candidates,
|facet_key, nbr_docids| {
@ -191,7 +190,7 @@ impl<'a> FacetDistribution<'a> {
let iter = db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
.remap_types::<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>();
.remap_types::<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>();
for result in iter {
let (key, value) = result?;
@ -206,7 +205,7 @@ impl<'a> FacetDistribution<'a> {
.facet_id_string_docids
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, prefix.as_slice())?
.remap_types::<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
.remap_types::<FacetGroupKeyCodec<StrRefCodec>, FacetGroupValueCodec>();
// TODO: get the original value of the facet somewhere (in the documents DB?)
for result in iter {

View File

@ -4,11 +4,11 @@ use heed::Result;
use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupValueCodec, FacetGroupKeyCodec};
pub fn iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
candidates: &RoaringBitmap,
callback: CB,
@ -18,9 +18,9 @@ where
{
let mut fd = FacetDistribution { rtxn, db, field_id, callback };
let highest_level =
get_highest_level(rtxn, db.remap_key_type::<FacetKeyCodec<MyByteSlice>>(), field_id)?;
get_highest_level(rtxn, db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), field_id)?;
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
return Ok(());
} else {
@ -33,7 +33,7 @@ where
CB: FnMut(&'t [u8], u64) -> ControlFlow<()>,
{
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
callback: CB,
}
@ -49,7 +49,7 @@ where
group_size: usize,
) -> Result<ControlFlow<()>> {
let starting_key =
FacetKey { field_id: self.field_id, level: 0, left_bound: starting_bound };
FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_bound };
let iter = self.db.range(self.rtxn, &(starting_key..))?.take(group_size);
for el in iter {
let (key, value) = el?;
@ -78,7 +78,7 @@ where
if level == 0 {
return self.iterate_level_0(candidates, starting_bound, group_size);
}
let starting_key = FacetKey { field_id: self.field_id, level, left_bound: starting_bound };
let starting_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_bound };
let iter = self.db.range(&self.rtxn, &(&starting_key..)).unwrap().take(group_size);
for el in iter {
@ -116,7 +116,7 @@ mod tests {
use roaring::RoaringBitmap;
use super::iterate_over_facet_distribution;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
use crate::milli_snap;
use crate::search::facet::test::FacetIndex;

View File

@ -4,12 +4,12 @@ use heed::BytesEncode;
use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice};
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef};
use crate::Result;
pub fn find_docids_of_facet_within_bounds<'t, BoundCodec>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<BoundCodec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<BoundCodec>, FacetGroupValueCodec>,
field_id: u16,
left: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
right: &'t Bound<<BoundCodec as BytesEncode<'t>>::EItem>,
@ -42,13 +42,13 @@ where
}
Bound::Unbounded => Bound::Unbounded,
};
let db = db.remap_key_type::<FacetKeyCodec<MyByteSlice>>();
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
let mut docids = RoaringBitmap::new();
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids: &mut docids };
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap();
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
f.run(highest_level, first_bound, Bound::Included(last_bound), usize::MAX)?;
Ok(docids)
} else {
@ -59,7 +59,7 @@ where
/// Fetch the document ids that have a facet with a value between the two given bounds
struct FacetRangeSearch<'t, 'b, 'bitmap> {
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
left: Bound<&'b [u8]>,
right: Bound<&'b [u8]>,
@ -68,7 +68,7 @@ struct FacetRangeSearch<'t, 'b, 'bitmap> {
impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
fn run_level_0(&mut self, starting_left_bound: &'t [u8], group_size: usize) -> Result<()> {
let left_key =
FacetKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };
FacetGroupKey { field_id: self.field_id, level: 0, left_bound: starting_left_bound };
let iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
for el in iter {
let (key, value) = el?;
@ -117,7 +117,7 @@ impl<'t, 'b, 'bitmap> FacetRangeSearch<'t, 'b, 'bitmap> {
return self.run_level_0(starting_left_bound, group_size);
}
let left_key = FacetKey { field_id: self.field_id, level, left_bound: starting_left_bound };
let left_key = FacetGroupKey { field_id: self.field_id, level, left_bound: starting_left_bound };
let mut iter = self.db.range(&self.rtxn, &(left_key..))?.take(group_size);
let (mut previous_key, mut previous_value) = iter.next().unwrap()?;
@ -258,8 +258,8 @@ mod tests {
use roaring::RoaringBitmap;
use super::find_docids_of_facet_within_bounds;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::FacetKeyCodec;
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::milli_snap;
use crate::search::facet::test::FacetIndex;
use crate::snapshot_tests::display_bitmap;
@ -310,7 +310,7 @@ mod tests {
let end = Bound::Included(i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
@ -326,7 +326,7 @@ mod tests {
let end = Bound::Excluded(i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
@ -352,7 +352,7 @@ mod tests {
let end = Bound::Included(255.);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
@ -371,7 +371,7 @@ mod tests {
let end = Bound::Excluded(255.);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
@ -399,7 +399,7 @@ mod tests {
let end = Bound::Included(255. - i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,
@ -418,7 +418,7 @@ mod tests {
let end = Bound::Excluded(255. - i);
let docids = find_docids_of_facet_within_bounds::<OrderedF64Codec>(
&txn,
index.db.content.remap_key_type::<FacetKeyCodec<OrderedF64Codec>>(),
index.db.content.remap_key_type::<FacetGroupKeyCodec<OrderedF64Codec>>(),
0,
&start,
&end,

View File

@ -2,19 +2,19 @@ use heed::Result;
use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
};
pub fn ascending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound };
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
Ok(Box::new(AscendingFacetSort { rtxn, db, field_id, stack: vec![(candidates, iter)] }))
@ -25,11 +25,11 @@ pub fn ascending_facet_sort<'t>(
struct AscendingFacetSort<'t, 'e> {
rtxn: &'t heed::RoTxn<'e>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
stack: Vec<(
RoaringBitmap,
std::iter::Take<heed::RoRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>,
std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
)>,
}
@ -41,7 +41,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
let (documents_ids, deepest_iter) = self.stack.last_mut()?;
for result in deepest_iter {
let (
FacetKey { level, left_bound, field_id },
FacetGroupKey { level, left_bound, field_id },
FacetGroupValue { size: group_size, mut bitmap },
) = result.unwrap();
// The range is unbounded on the right and the group size for the highest level is MAX,
@ -65,7 +65,7 @@ impl<'t, 'e> Iterator for AscendingFacetSort<'t, 'e> {
return Some(Ok(bitmap));
}
let starting_key_below =
FacetKey { field_id: self.field_id, level: level - 1, left_bound };
FacetGroupKey { field_id: self.field_id, level: level - 1, left_bound };
let iter = match self.db.range(&self.rtxn, &(starting_key_below..)) {
Ok(iter) => iter,
Err(e) => return Some(Err(e.into())),
@ -86,7 +86,7 @@ mod tests {
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
use crate::milli_snap;
use crate::search::facet::facet_sort_ascending::ascending_facet_sort;
use crate::search::facet::test::FacetIndex;

View File

@ -4,21 +4,21 @@ use heed::Result;
use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
};
pub fn descending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<MyByteSlice>(rtxn, db, field_id)? {
let first_key = FacetKey { field_id, level: highest_level, left_bound: first_bound };
let last_bound = get_last_facet_value::<MyByteSlice>(rtxn, db, field_id)?.unwrap();
let last_key = FacetKey { field_id, level: highest_level, left_bound: last_bound };
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
Ok(Box::new(DescendingFacetSort {
rtxn,
@ -33,11 +33,11 @@ pub fn descending_facet_sort<'t>(
struct DescendingFacetSort<'t> {
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
stack: Vec<(
RoaringBitmap,
std::iter::Take<heed::RoRevRange<'t, FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>>,
std::iter::Take<heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
Bound<&'t [u8]>,
)>,
}
@ -50,7 +50,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
let (documents_ids, deepest_iter, right_bound) = self.stack.last_mut()?;
while let Some(result) = deepest_iter.next() {
let (
FacetKey { level, left_bound, field_id },
FacetGroupKey { level, left_bound, field_id },
FacetGroupValue { size: group_size, mut bitmap },
) = result.unwrap();
// The range is unbounded on the right and the group size for the highest level is MAX,
@ -72,15 +72,15 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
if level == 0 {
return Some(Ok(bitmap));
}
let starting_key_below = FacetKey { field_id, level: level - 1, left_bound };
let starting_key_below = FacetGroupKey { field_id, level: level - 1, left_bound };
let end_key_kelow = match *right_bound {
Bound::Included(right) => Bound::Included(FacetKey {
Bound::Included(right) => Bound::Included(FacetGroupKey {
field_id,
level: level - 1,
left_bound: right,
}),
Bound::Excluded(right) => Bound::Excluded(FacetKey {
Bound::Excluded(right) => Bound::Excluded(FacetGroupKey {
field_id,
level: level - 1,
left_bound: right,
@ -90,7 +90,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
let prev_right_bound = *right_bound;
*right_bound = Bound::Excluded(left_bound);
let iter =
match self.db.remap_key_type::<FacetKeyCodec<MyByteSlice>>().rev_range(
match self.db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>().rev_range(
&self.rtxn,
&(Bound::Included(starting_key_below), end_key_kelow),
) {
@ -114,8 +114,8 @@ mod tests {
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::{FacetKeyCodec, MyByteSlice};
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::{FacetGroupKeyCodec, ByteSliceRef};
use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::test::FacetIndex;
@ -162,7 +162,7 @@ mod tests {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
let mut results = String::new();
let db = index.db.content.remap_key_type::<FacetKeyCodec<MyByteSlice>>();
let db = index.db.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter {
let docids = el.unwrap();

View File

@ -9,8 +9,8 @@ use roaring::RoaringBitmap;
use super::facet_range_search;
use crate::error::{Error, UserError};
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec};
use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::{distance_between_two_points, lat_lng_to_xyz, FieldId, Index, Result};
/// The maximum number of filters the filter AST can process.
@ -180,7 +180,11 @@ impl<'a> Filter<'a> {
let string_docids = strings_db
.get(
rtxn,
&FacetKey { field_id, level: 0, left_bound: &val.value().to_lowercase() },
&FacetGroupKey {
field_id,
level: 0,
left_bound: &val.value().to_lowercase(),
},
)?
.map(|v| v.bitmap)
.unwrap_or_default();
@ -218,10 +222,10 @@ impl<'a> Filter<'a> {
.remap_data_type::<DecodeIgnore>()
.get_lower_than_or_equal_to(
rtxn,
&FacetKey { field_id, level: u8::MAX, left_bound: f64::MAX },
&FacetGroupKey { field_id, level: u8::MAX, left_bound: f64::MAX },
)?
.and_then(
|(FacetKey { field_id: id, level, .. }, _)| {
|(FacetGroupKey { field_id: id, level, .. }, _)| {
if id == field_id {
Some(level)
} else {
@ -252,7 +256,7 @@ impl<'a> Filter<'a> {
/// going deeper through the levels.
fn explore_facet_number_levels(
rtxn: &heed::RoTxn,
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
field_id: FieldId,
level: u8,
left: Bound<f64>,

View File

@ -3,7 +3,7 @@ use heed::{BytesDecode, RoTxn};
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
pub use self::filter::Filter;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
mod facet_distribution;
mod facet_distribution_iter;
@ -14,7 +14,7 @@ mod filter;
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
txn: &'t RoTxn,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>
where
@ -28,7 +28,7 @@ where
if let Some(first) = level0_iter_forward.next() {
let (first_key, _) = first?;
let first_key =
FacetKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?;
FacetGroupKeyCodec::<BoundCodec>::bytes_decode(first_key).ok_or(heed::Error::Encoding)?;
Ok(Some(first_key.left_bound))
} else {
Ok(None)
@ -36,7 +36,7 @@ where
}
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
txn: &'t RoTxn,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>
where
@ -51,7 +51,7 @@ where
if let Some(last) = level0_iter_backward.next() {
let (last_key, _) = last?;
let last_key =
FacetKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?;
FacetGroupKeyCodec::<BoundCodec>::bytes_decode(last_key).ok_or(heed::Error::Encoding)?;
Ok(Some(last_key.left_bound))
} else {
Ok(None)
@ -59,7 +59,7 @@ where
}
pub(crate) fn get_highest_level<'t>(
txn: &'t RoTxn<'t>,
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<u8> {
let field_id_prefix = &field_id.to_be_bytes();
@ -69,7 +69,7 @@ pub(crate) fn get_highest_level<'t>(
.next()
.map(|el| {
let (key, _) = el.unwrap();
let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(key).unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key).unwrap();
key.level
})
.unwrap_or(0))
@ -84,8 +84,8 @@ pub mod test {
use heed::{BytesDecode, BytesEncode, Env, RwTxn};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
};
use crate::snapshot_tests::display_bitmap;
use crate::update::FacetsUpdateIncremental;
@ -101,7 +101,7 @@ pub mod test {
}
pub struct Database {
pub content: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
pub group_size: usize,
pub max_group_size: usize,
_tempdir: Rc<tempfile::TempDir>,
@ -184,7 +184,7 @@ pub mod test {
let mut iter = self.db.content.iter(&txn).unwrap();
while let Some(el) = iter.next() {
let (key, value) = el.unwrap();
let FacetKey { field_id, level, left_bound: bound } = key;
let FacetGroupKey { field_id, level, left_bound: bound } = key;
let bound = BoundCodec::bytes_decode(bound).unwrap();
let FacetGroupValue { size, bitmap } = value;
writeln!(

View File

@ -5,7 +5,7 @@ use std::path::Path;
use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::heed_codec::facet::new::{FacetGroupValue, FacetKey};
use crate::heed_codec::facet::{FacetGroupValue, FacetGroupKey};
use crate::{make_db_snap_from_iter, ExternalDocumentsIds, Index};
#[track_caller]
@ -280,7 +280,7 @@ pub fn snap_word_prefix_position_docids(index: &Index) -> String {
}
pub fn snap_facet_id_f64_docids(index: &Index) -> String {
let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |(
FacetKey { field_id, level, left_bound },
FacetGroupKey { field_id, level, left_bound },
FacetGroupValue { size, bitmap },
)| {
&format!("{field_id:<3} {level:<2} {left_bound:<6} {size:<2} {}", display_bitmap(&bitmap))
@ -289,7 +289,7 @@ pub fn snap_facet_id_f64_docids(index: &Index) -> String {
}
pub fn snap_facet_id_string_docids(index: &Index) -> String {
let snap = make_db_snap_from_iter!(index, facet_id_string_docids, |(
FacetKey { field_id, level, left_bound },
FacetGroupKey { field_id, level, left_bound },
FacetGroupValue { size, bitmap },
)| {
&format!("{field_id:<3} {level:<2} {left_bound:<12} {size:<2} {}", display_bitmap(&bitmap))

View File

@ -11,7 +11,7 @@ use time::OffsetDateTime;
use super::{ClearDocuments, FacetsUpdateBulk};
use crate::error::{InternalError, UserError};
use crate::facet::FacetType;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::index::{db_name, main_key};
use crate::{
@ -626,10 +626,10 @@ fn remove_docids_from_facet_id_docids<'a>(
) -> Result<()> {
let db = match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
};
let mut modified = false;

View File

@ -12,8 +12,8 @@ use time::OffsetDateTime;
use crate::error::InternalError;
use crate::facet::FacetType;
use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
};
use crate::update::index_documents::{
create_writer, valid_lmdb_key, write_into_lmdb_database, writer_into_reader,
@ -22,7 +22,7 @@ use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
pub struct FacetsUpdateBulk<'i> {
index: &'i Index,
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
level_group_size: usize,
min_level_size: usize,
facet_type: FacetType,
@ -40,10 +40,10 @@ impl<'i> FacetsUpdateBulk<'i> {
index,
database: match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
},
level_group_size: 4,
@ -61,10 +61,10 @@ impl<'i> FacetsUpdateBulk<'i> {
index,
database: match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
},
level_group_size: 4,
@ -89,8 +89,8 @@ impl<'i> FacetsUpdateBulk<'i> {
}
fn clear_levels(&self, wtxn: &mut heed::RwTxn, field_id: FieldId) -> Result<()> {
let left = FacetKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
let right = FacetKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
let left = FacetGroupKey::<&[u8]> { field_id, level: 1, left_bound: &[] };
let right = FacetGroupKey::<&[u8]> { field_id, level: u8::MAX, left_bound: &[] };
let range = left..=right;
self.database.delete_range(wtxn, &range).map(drop)?;
Ok(())
@ -119,7 +119,7 @@ impl<'i> FacetsUpdateBulk<'i> {
for level_reader in level_readers {
let mut cursor = level_reader.into_cursor()?;
while let Some((k, v)) = cursor.move_on_next()? {
let key = FacetKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap();
let key = FacetGroupKeyCodec::<DecodeIgnore>::bytes_decode(k).unwrap();
let value = FacetGroupValueCodec::bytes_decode(v).unwrap();
println!("inserting {key:?} {value:?}");
@ -210,7 +210,7 @@ impl<'i> FacetsUpdateBulk<'i> {
struct ComputeHigherLevels<'t> {
rtxn: &'t heed::RoTxn<'t>,
db: &'t heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: &'t heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
field_id: u16,
level_group_size: usize,
min_level_size: usize,
@ -233,7 +233,7 @@ impl<'t> ComputeHigherLevels<'t> {
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(self.rtxn, level_0_prefix.as_slice())?
.remap_types::<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>();
.remap_types::<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>();
let mut left_bound: &[u8] = &[];
let mut first_iteration_for_new_group = true;
@ -311,9 +311,9 @@ impl<'t> ComputeHigherLevels<'t> {
for ((bitmap, left_bound), group_size) in
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{
let key = FacetKey { field_id: self.field_id, level, left_bound };
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
let key =
FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?;
FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value =
FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
@ -329,9 +329,9 @@ impl<'t> ComputeHigherLevels<'t> {
for ((bitmap, left_bound), group_size) in
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{
let key = FacetKey { field_id: self.field_id, level, left_bound };
let key = FacetGroupKey { field_id: self.field_id, level, left_bound };
let key =
FacetKeyCodec::<MyByteSlice>::bytes_encode(&key).ok_or(Error::Encoding)?;
FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).ok_or(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
cur_writer.insert(key, value)?;

View File

@ -2,8 +2,8 @@ use heed::types::ByteSlice;
use heed::{BytesDecode, Error, RoTxn, RwTxn};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::{
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec, MyByteSlice,
use crate::heed_codec::facet::{
FacetGroupValue, FacetGroupValueCodec, FacetGroupKey, FacetGroupKeyCodec, ByteSliceRef,
};
use crate::search::facet::get_highest_level;
use crate::Result;
@ -19,13 +19,13 @@ enum DeletionResult {
}
pub struct FacetsUpdateIncremental {
db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
group_size: usize,
min_level_size: usize,
max_group_size: usize,
}
impl FacetsUpdateIncremental {
pub fn new(db: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>) -> Self {
pub fn new(db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>) -> Self {
Self { db, group_size: 4, min_level_size: 5, max_group_size: 8 }
}
}
@ -36,7 +36,7 @@ impl FacetsUpdateIncremental {
level: u8,
search_key: &[u8],
txn: &RoTxn,
) -> Result<(FacetKey<Vec<u8>>, FacetGroupValue)> {
) -> Result<(FacetGroupKey<Vec<u8>>, FacetGroupValue)> {
let mut prefix = vec![];
prefix.extend_from_slice(&field_id.to_be_bytes());
prefix.push(level);
@ -45,17 +45,17 @@ impl FacetsUpdateIncremental {
let mut prefix_iter = self
.db
.as_polymorph()
.prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(txn, &prefix.as_slice())?;
.prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(txn, &prefix.as_slice())?;
if let Some(e) = prefix_iter.next() {
let (key_bytes, value) = e?;
Ok((
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?
.into_owned(),
value,
))
} else {
let key = FacetKey { field_id, level, left_bound: search_key };
let key = FacetGroupKey { field_id, level, left_bound: search_key };
match self.db.get_lower_than(txn, &key)? {
Some((key, value)) => {
if key.level != level || key.field_id != field_id {
@ -66,13 +66,13 @@ impl FacetsUpdateIncremental {
let mut iter = self
.db
.as_polymorph()
.prefix_iter::<_, MyByteSlice, FacetGroupValueCodec>(
.prefix_iter::<_, ByteSliceRef, FacetGroupValueCodec>(
txn,
&prefix.as_slice(),
)?;
let (key_bytes, value) = iter.next().unwrap()?;
Ok((
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?
.into_owned(),
value,
@ -93,7 +93,7 @@ impl FacetsUpdateIncremental {
new_key: &[u8],
new_values: &RoaringBitmap,
) -> Result<InsertionResult> {
let key = FacetKey { field_id, level: 0, left_bound: new_key };
let key = FacetGroupKey { field_id, level: 0, left_bound: new_key };
let value = FacetGroupValue { bitmap: new_values.clone(), size: 1 };
let mut level0_prefix = vec![];
@ -193,7 +193,7 @@ impl FacetsUpdateIncremental {
.db
.get_greater_than_or_equal_to(
&txn,
&FacetKey {
&FacetGroupKey {
field_id,
level: level_below,
left_bound: insertion_key.left_bound.as_slice(),
@ -217,7 +217,7 @@ impl FacetsUpdateIncremental {
}
let key =
FacetKey { field_id, level, left_bound: insertion_key.left_bound.clone() };
FacetGroupKey { field_id, level, left_bound: insertion_key.left_bound.clone() };
let value = FacetGroupValue { size: size_left as u8, bitmap: values_left };
(key, value)
};
@ -235,7 +235,7 @@ impl FacetsUpdateIncremental {
}
let key =
FacetKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() };
FacetGroupKey { field_id, level, left_bound: right_start_key.unwrap().to_vec() };
let value = FacetGroupValue { size: size_right as u8, bitmap: values_right };
(key, value)
};
@ -303,7 +303,7 @@ impl FacetsUpdateIncremental {
let mut values = RoaringBitmap::new();
for _ in 0..group_size {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes)
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?;
if first_key.is_none() {
@ -311,7 +311,7 @@ impl FacetsUpdateIncremental {
}
values |= value_i.bitmap;
}
let key = FacetKey {
let key = FacetGroupKey {
field_id,
level: highest_level + 1,
left_bound: first_key.unwrap().left_bound,
@ -384,7 +384,7 @@ impl FacetsUpdateIncremental {
key: &[u8],
value: u32,
) -> Result<DeletionResult> {
let key = FacetKey { field_id, level: 0, left_bound: key };
let key = FacetGroupKey { field_id, level: 0, left_bound: key };
let mut bitmap = self.db.get(&txn, &key)?.unwrap().bitmap;
bitmap.remove(value);
@ -415,7 +415,7 @@ impl FacetsUpdateIncremental {
key: &[u8],
value: u32,
) -> Result<()> {
if self.db.get(txn, &FacetKey { field_id, level: 0, left_bound: key })?.is_none() {
if self.db.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: key })?.is_none() {
return Ok(());
}
let highest_level = get_highest_level(&txn, self.db, field_id)?;
@ -450,7 +450,7 @@ impl FacetsUpdateIncremental {
while let Some(el) = iter.next() {
let (k, _) = el?;
to_delete.push(
FacetKeyCodec::<MyByteSlice>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(),
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k).ok_or(Error::Encoding)?.into_owned(),
);
}
drop(iter);
@ -469,9 +469,9 @@ mod tests {
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice};
use crate::heed_codec::facet::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::str_ref::StrRefCodec;
use crate::heed_codec::facet::{FacetGroupValueCodec, FacetGroupKeyCodec, ByteSliceRef};
use crate::milli_snap;
use crate::search::facet::get_highest_level;
use crate::search::facet::test::FacetIndex;
@ -502,7 +502,7 @@ mod tests {
.unwrap();
while let Some(el) = iter.next() {
let (key, value) = el.unwrap();
let key = FacetKeyCodec::<MyByteSlice>::bytes_decode(&key).unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key).unwrap();
let mut prefix_start_below = vec![];
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
@ -519,7 +519,7 @@ mod tests {
)
.unwrap();
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
FacetKeyCodec::<MyByteSlice>::bytes_decode(&key_bytes).unwrap()
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
};
assert!(value.size > 0 && (value.size as usize) < db.max_group_size);
@ -996,7 +996,7 @@ mod tests {
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
// let (group_key, group_values) = group.unwrap();
// let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
// let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
// assert_eq!(key, &group_key.left_bound);
// assert_eq!(values, &group_values.bitmap);
// }
@ -1014,7 +1014,7 @@ mod tests {
// for ((key, values), group) in values_field_id.iter().zip(level0iter) {
// let (group_key, group_values) = group.unwrap();
// let group_key = FacetKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
// let group_key = FacetGroupKeyCodec::<U16Codec>::bytes_decode(group_key).unwrap();
// assert_eq!(key, &group_key.left_bound);
// assert_eq!(values, &group_values.bitmap);
// }

View File

@ -1,23 +1,20 @@
use std::{collections::HashMap, fs::File};
use super::{FacetsUpdateBulk, FacetsUpdateIncremental};
use crate::{
facet::FacetType,
heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec},
CboRoaringBitmapCodec, FieldId, Index, Result,
};
use grenad::CompressionType;
use heed::BytesDecode;
use roaring::RoaringBitmap;
use crate::{
facet::FacetType,
heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec, MyByteSlice},
CboRoaringBitmapCodec, FieldId, Index, Result,
};
use super::{FacetsUpdateBulk, FacetsUpdateIncremental};
use std::{collections::HashMap, fs::File};
pub mod bulk;
pub mod incremental;
pub struct FacetsUpdate<'i> {
index: &'i Index,
database: heed::Database<FacetKeyCodec<MyByteSlice>, FacetGroupValueCodec>,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
level_group_size: u8,
max_level_group_size: u8,
min_level_size: u8,
@ -28,10 +25,10 @@ impl<'i> FacetsUpdate<'i> {
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
let database = match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetKeyCodec<MyByteSlice>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
};
Self {
@ -70,8 +67,8 @@ impl<'i> FacetsUpdate<'i> {
let mut cursor = self.new_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
let key =
FacetKeyCodec::<MyByteSlice>::bytes_decode(key).ok_or(heed::Error::Encoding)?;
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key)
.ok_or(heed::Error::Encoding)?;
let docids =
CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
indexer.insert(wtxn, key.field_id, key.left_bound, &docids)?;

View File

@ -6,9 +6,9 @@ use heed::{BytesDecode, BytesEncode};
use super::helpers::{
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
};
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::Result;
/// Extracts the facet number and the documents ids where this facet number appear.
@ -36,8 +36,8 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
let (field_id, document_id, number) =
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
let key = FacetKey { field_id, level: 0, left_bound: number };
let key_bytes = FacetKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
let key = FacetGroupKey { field_id, level: 0, left_bound: number };
let key_bytes = FacetGroupKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
}

View File

@ -4,8 +4,8 @@ use std::io;
use heed::BytesEncode;
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
use crate::heed_codec::facet::new::{FacetKey, FacetKeyCodec};
use crate::heed_codec::facet::StrRefCodec;
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
use crate::{FieldId, Result};
@ -43,8 +43,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
let document_id = u32::from_be_bytes(document_id_bytes);
let normalised_value = std::str::from_utf8(normalized_value_bytes)?;
let key = FacetKey { field_id, level: 0, left_bound: normalised_value };
let key_bytes = FacetKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value };
let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?;
}

View File

@ -0,0 +1,4 @@
---
source: milli/src/update/word_prefix_pair_proximity_docids.rs
---
6873ff1f78d08f2b1a13bb9e37349c01