Introduce a lot of facet string helper iterators

This commit is contained in:
Kerollmops 2021-06-23 15:53:28 +02:00
parent 851f979039
commit a79661c6dc
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
5 changed files with 319 additions and 8 deletions

View file

@ -0,0 +1,52 @@
use std::borrow::Cow;
use std::convert::TryInto;
use std::num::NonZeroU8;
use crate::FieldId;
/// A codec that stores the field id, level 1 and higher and the groups ids.
///
/// It can only be used to encode the facet string of the level 1 or higher.
pub struct FacetLevelValueU32Codec;
impl<'a> heed::BytesDecode<'a> for FacetLevelValueU32Codec {
type DItem = (FieldId, NonZeroU8, u32, u32);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id, bytes) = bytes.split_first()?;
let (level, bytes) = bytes.split_first()?;
let level = NonZeroU8::new(*level)?;
let left = bytes[16..20].try_into().ok().map(u32::from_be_bytes)?;
let right = bytes[20..].try_into().ok().map(u32::from_be_bytes)?;
Some((*field_id, level, left, right))
}
}
impl heed::BytesEncode<'_> for FacetLevelValueU32Codec {
type EItem = (FieldId, NonZeroU8, u32, u32);
fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
let mut buffer = [0u8; 16];
// Write the big-endian integers.
let bytes = left.to_be_bytes();
buffer[..4].copy_from_slice(&bytes[..]);
let bytes = right.to_be_bytes();
buffer[4..8].copy_from_slice(&bytes[..]);
// Then the u32 values just to be able to read them back.
let bytes = left.to_be_bytes();
buffer[8..12].copy_from_slice(&bytes[..]);
let bytes = right.to_be_bytes();
buffer[12..].copy_from_slice(&bytes[..]);
let mut bytes = Vec::with_capacity(buffer.len() + 2);
bytes.push(*field_id);
bytes.push(level.get());
bytes.extend_from_slice(&buffer);
Some(Cow::Owned(bytes))
}
}

View file

@ -0,0 +1,49 @@
use std::borrow::Cow;
use std::str;
use crate::FieldId;
/// A codec that stores the field id, level 0, and facet string.
///
/// It can only be used to encode the facet string of the level 0,
/// as it hardcodes the level.
///
/// We encode the level 0 to not break the lexicographical ordering of the LMDB keys,
/// and make sure that the levels are not mixed-up. The level 0 is special, the key
/// are strings, other levels represent groups and keys are simply two integers.
pub struct FacetStringLevelZeroCodec;
impl FacetStringLevelZeroCodec {
pub fn serialize_into(field_id: FieldId, value: &str, out: &mut Vec<u8>) {
out.reserve(value.len() + 2);
out.push(field_id);
out.push(0); // the level zero (for LMDB ordering only)
out.extend_from_slice(value.as_bytes());
}
}
impl<'a> heed::BytesDecode<'a> for FacetStringLevelZeroCodec {
type DItem = (FieldId, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id, bytes) = bytes.split_first()?;
let (level, bytes) = bytes.split_first()?;
if *level != 0 {
return None;
}
let value = str::from_utf8(bytes).ok()?;
Some((*field_id, value))
}
}
impl<'a> heed::BytesEncode<'a> for FacetStringLevelZeroCodec {
type EItem = (FieldId, &'a str);
fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::new();
FacetStringLevelZeroCodec::serialize_into(*field_id, value, &mut bytes);
Some(Cow::Owned(bytes))
}
}

View file

@ -0,0 +1,80 @@
use std::borrow::Cow;
use std::convert::TryInto;
use std::{marker, str};
/// A codec that encodes two strings in front of the value.
///
/// The usecase is for the facet string levels algorithm where we must
/// know the origin of a group, the group left and right bounds are stored
/// in the value to not break the lexicographical ordering of the LMDB keys.
pub struct FacetStringZeroBoundsValueCodec<C>(marker::PhantomData<C>);
impl<'a, C> heed::BytesDecode<'a> for FacetStringZeroBoundsValueCodec<C>
where
C: heed::BytesDecode<'a>,
{
type DItem = (Option<(&'a str, &'a str)>, C::DItem);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (contains_bounds, tail_bytes) = bytes.split_first()?;
if *contains_bounds != 0 {
let (left_len, bytes) = try_split_at(bytes, 2)?;
let (right_len, bytes) = try_split_at(bytes, 2)?;
let left_len = left_len.try_into().ok().map(u16::from_be_bytes)?;
let right_len = right_len.try_into().ok().map(u16::from_be_bytes)?;
let (left, bytes) = try_split_at(bytes, left_len as usize)?;
let (right, bytes) = try_split_at(bytes, right_len as usize)?;
let left = str::from_utf8(left).ok()?;
let right = str::from_utf8(right).ok()?;
C::bytes_decode(bytes).map(|item| (Some((left, right)), item))
} else {
C::bytes_decode(tail_bytes).map(|item| (None, item))
}
}
}
impl<'a, C> heed::BytesEncode<'a> for FacetStringZeroBoundsValueCodec<C>
where
C: heed::BytesEncode<'a>,
{
type EItem = (Option<(&'a str, &'a str)>, C::EItem);
fn bytes_encode((bounds, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::new();
match bounds {
Some((left, right)) => {
let left_len: u16 = left.len().try_into().ok()?;
let right_len: u16 = right.len().try_into().ok()?;
bytes.extend_from_slice(&left_len.to_be_bytes());
bytes.extend_from_slice(&right_len.to_be_bytes());
let value_bytes = C::bytes_encode(&value)?;
bytes.extend_from_slice(&value_bytes[..]);
Some(Cow::Owned(bytes))
}
None => {
bytes.push(0);
let value_bytes = C::bytes_encode(&value)?;
bytes.extend_from_slice(&value_bytes[..]);
Some(Cow::Owned(bytes))
}
}
}
}
/// Tries to split a slice in half at the given middle point,
/// `None` if the slice is too short.
fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
if slice.len() >= mid {
Some(slice.split_at(mid))
} else {
None
}
}

View file

@ -1,9 +1,15 @@
mod facet_level_value_f64_codec;
mod facet_level_value_u32_codec;
mod facet_string_level_zero_codec;
mod facet_string_zero_bounds_value_codec;
mod facet_value_string_codec;
mod field_doc_id_facet_f64_codec;
mod field_doc_id_facet_string_codec;
pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec;
pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec;
pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec;
pub use self::facet_value_string_codec::FacetValueStringCodec;
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;