mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Refactor facet-related codecs
This commit is contained in:
parent
9b55e582cd
commit
485a72306d
22 changed files with 280 additions and 301 deletions
|
@ -1,25 +1,19 @@
|
|||
// mod facet_level_value_f64_codec;
|
||||
// mod facet_level_value_u32_codec;
|
||||
// mod facet_string_level_zero_codec;
|
||||
// mod facet_string_level_zero_value_codec;
|
||||
// mod facet_string_zero_bounds_value_codec;
|
||||
mod field_doc_id_facet_f64_codec;
|
||||
mod field_doc_id_facet_string_codec;
|
||||
mod ordered_f64_codec;
|
||||
mod str_ref;
|
||||
|
||||
pub mod new;
|
||||
|
||||
use heed::types::OwnedType;
|
||||
|
||||
// pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
|
||||
// pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec;
|
||||
// pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec;
|
||||
// pub use self::facet_string_level_zero_value_codec::{
|
||||
// decode_prefix_string, encode_prefix_string, FacetStringLevelZeroValueCodec,
|
||||
// };
|
||||
// pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec;
|
||||
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
||||
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
||||
use crate::BEU16;
|
||||
pub use self::ordered_f64_codec::OrderedF64Codec;
|
||||
pub use self::str_ref::StrRefCodec;
|
||||
use crate::{CboRoaringBitmapCodec, BEU16};
|
||||
use heed::types::OwnedType;
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use roaring::RoaringBitmap;
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryFrom;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub type FieldIdCodec = OwnedType<BEU16>;
|
||||
|
||||
|
@ -32,3 +26,109 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
|
|||
None
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct FacetGroupKey<T> {
|
||||
pub field_id: u16,
|
||||
pub level: u8,
|
||||
pub left_bound: T,
|
||||
}
|
||||
impl<'a> FacetGroupKey<&'a [u8]> {
|
||||
pub fn into_owned(self) -> FacetGroupKey<Vec<u8>> {
|
||||
FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: self.level,
|
||||
left_bound: self.left_bound.to_vec(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FacetGroupKey<Vec<u8>> {
|
||||
pub fn as_ref(&self) -> FacetGroupKey<&[u8]> {
|
||||
FacetGroupKey {
|
||||
field_id: self.field_id,
|
||||
level: self.level,
|
||||
left_bound: self.left_bound.as_slice(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FacetGroupValue {
|
||||
pub size: u8,
|
||||
pub bitmap: RoaringBitmap,
|
||||
}
|
||||
|
||||
pub struct FacetGroupKeyCodec<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<'a, T> heed::BytesEncode<'a> for FacetGroupKeyCodec<T>
|
||||
where
|
||||
T: BytesEncode<'a>,
|
||||
T::EItem: Sized,
|
||||
{
|
||||
type EItem = FacetGroupKey<T::EItem>;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut v = vec![];
|
||||
v.extend_from_slice(&value.field_id.to_be_bytes());
|
||||
v.extend_from_slice(&[value.level]);
|
||||
|
||||
let bound = T::bytes_encode(&value.left_bound)?;
|
||||
v.extend_from_slice(&bound);
|
||||
|
||||
Some(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
|
||||
where
|
||||
T: BytesDecode<'a>,
|
||||
{
|
||||
type DItem = FacetGroupKey<T::DItem>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
|
||||
let level = bytes[2];
|
||||
let bound = T::bytes_decode(&bytes[3..])?;
|
||||
Some(FacetGroupKey { field_id: fid, level, left_bound: bound })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FacetGroupValueCodec;
|
||||
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
type EItem = FacetGroupValue;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut v = vec![];
|
||||
v.push(value.size);
|
||||
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
|
||||
Some(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||
type DItem = FacetGroupValue;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let size = bytes[0];
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
|
||||
Some(FacetGroupValue { size, bitmap })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct ByteSliceRef;
|
||||
|
||||
impl<'a> BytesEncode<'a> for ByteSliceRef {
|
||||
type EItem = &'a [u8];
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for ByteSliceRef {
|
||||
type DItem = &'a [u8];
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(bytes)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -1,120 +0,0 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::TryFrom;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::CboRoaringBitmapCodec;
|
||||
|
||||
pub mod ordered_f64_codec;
|
||||
pub mod str_ref;
|
||||
// TODO: these codecs were quickly written and not fast/resilient enough
|
||||
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
pub struct FacetKey<T> {
|
||||
pub field_id: u16,
|
||||
pub level: u8,
|
||||
pub left_bound: T,
|
||||
}
|
||||
impl<'a> FacetKey<&'a [u8]> {
|
||||
pub fn into_owned(self) -> FacetKey<Vec<u8>> {
|
||||
FacetKey {
|
||||
field_id: self.field_id,
|
||||
level: self.level,
|
||||
left_bound: self.left_bound.to_vec(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> FacetKey<Vec<u8>> {
|
||||
pub fn as_ref(&self) -> FacetKey<&[u8]> {
|
||||
FacetKey {
|
||||
field_id: self.field_id,
|
||||
level: self.level,
|
||||
left_bound: self.left_bound.as_slice(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub struct FacetGroupValue {
|
||||
pub size: u8,
|
||||
pub bitmap: RoaringBitmap,
|
||||
}
|
||||
|
||||
pub struct FacetKeyCodec<T> {
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<'a, T> heed::BytesEncode<'a> for FacetKeyCodec<T>
|
||||
where
|
||||
T: BytesEncode<'a>,
|
||||
T::EItem: Sized,
|
||||
{
|
||||
type EItem = FacetKey<T::EItem>;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut v = vec![];
|
||||
v.extend_from_slice(&value.field_id.to_be_bytes());
|
||||
v.extend_from_slice(&[value.level]);
|
||||
|
||||
let bound = T::bytes_encode(&value.left_bound)?;
|
||||
v.extend_from_slice(&bound);
|
||||
|
||||
Some(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a, T> heed::BytesDecode<'a> for FacetKeyCodec<T>
|
||||
where
|
||||
T: BytesDecode<'a>,
|
||||
{
|
||||
type DItem = FacetKey<T::DItem>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
|
||||
let level = bytes[2];
|
||||
let bound = T::bytes_decode(&bytes[3..])?;
|
||||
Some(FacetKey { field_id: fid, level, left_bound: bound })
|
||||
}
|
||||
}
|
||||
|
||||
pub struct FacetGroupValueCodec;
|
||||
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||
type EItem = FacetGroupValue;
|
||||
|
||||
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
let mut v = vec![];
|
||||
v.push(value.size);
|
||||
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
|
||||
Some(Cow::Owned(v))
|
||||
}
|
||||
}
|
||||
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||
type DItem = FacetGroupValue;
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let size = bytes[0];
|
||||
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
|
||||
Some(FacetGroupValue { size, bitmap })
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: get rid of this codec as it is named confusingly + should really be part of heed
|
||||
// or even replace the current ByteSlice codec
|
||||
pub struct MyByteSlice;
|
||||
|
||||
impl<'a> BytesEncode<'a> for MyByteSlice {
|
||||
type EItem = &'a [u8];
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for MyByteSlice {
|
||||
type DItem = &'a [u8];
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(bytes)
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue