Further unify facet databases of f64s and strings

This commit is contained in:
Loïc Lecrenier 2022-09-08 13:28:17 +02:00 committed by Loïc Lecrenier
parent 3baa34d842
commit cb8442a119
5 changed files with 63 additions and 129 deletions

View file

@ -1,13 +1,15 @@
use std::borrow::Cow;
use std::convert::TryInto;
use crate::facet::value_encoding::f64_into_bytes;
use crate::{try_split_array_at, DocumentId, FieldId};
use heed::{BytesDecode, BytesEncode};
use std::borrow::Cow;
use std::marker::PhantomData;
pub struct FieldDocIdFacetF64Codec;
pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetF64Codec {
type DItem = (FieldId, DocumentId, f64);
impl<'a, C> BytesDecode<'a> for FieldDocIdFacetCodec<C>
where
C: BytesDecode<'a>,
{
type DItem = (FieldId, DocumentId, C::DItem);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
@ -16,22 +18,24 @@ impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetF64Codec {
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
let document_id = u32::from_be_bytes(document_id_bytes);
let value = bytes[8..16].try_into().map(f64::from_be_bytes).ok()?;
let value = C::bytes_decode(&bytes[8..])?;
Some((field_id, document_id, value))
}
}
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetF64Codec {
type EItem = (FieldId, DocumentId, f64);
impl<'a, C> BytesEncode<'a> for FieldDocIdFacetCodec<C>
where
C: BytesEncode<'a>,
{
type EItem = (FieldId, DocumentId, C::EItem);
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(2 + 4 + 8 + 8);
bytes.extend_from_slice(&field_id.to_be_bytes());
bytes.extend_from_slice(&document_id.to_be_bytes());
let value_bytes = f64_into_bytes(*value)?;
let value_bytes = C::bytes_encode(value)?;
bytes.extend_from_slice(&value_bytes);
bytes.extend_from_slice(&value.to_be_bytes());
Some(Cow::Owned(bytes))
}
}

View file

@ -1,50 +0,0 @@
use std::borrow::Cow;
use std::str;
use crate::{try_split_array_at, DocumentId, FieldId};
pub struct FieldDocIdFacetStringCodec;
impl FieldDocIdFacetStringCodec {
pub fn serialize_into(
field_id: FieldId,
document_id: DocumentId,
normalized_value: &str,
out: &mut Vec<u8>,
) {
out.reserve(2 + 4 + normalized_value.len());
out.extend_from_slice(&field_id.to_be_bytes());
out.extend_from_slice(&document_id.to_be_bytes());
out.extend_from_slice(normalized_value.as_bytes());
}
}
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec {
type DItem = (FieldId, DocumentId, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
let field_id = u16::from_be_bytes(field_id_bytes);
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
let document_id = u32::from_be_bytes(document_id_bytes);
let normalized_value = str::from_utf8(bytes).ok()?;
Some((field_id, document_id, normalized_value))
}
}
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetStringCodec {
type EItem = (FieldId, DocumentId, &'a str);
fn bytes_encode((field_id, document_id, normalized_value): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::new();
FieldDocIdFacetStringCodec::serialize_into(
*field_id,
*document_id,
normalized_value,
&mut bytes,
);
Some(Cow::Owned(bytes))
}
}

View file

@ -1,5 +1,4 @@
mod field_doc_id_facet_f64_codec;
mod field_doc_id_facet_string_codec;
mod field_doc_id_facet_codec;
mod ordered_f64_codec;
mod str_ref;
@ -7,16 +6,19 @@ use std::borrow::Cow;
use std::convert::TryFrom;
use std::marker::PhantomData;
use heed::types::OwnedType;
use heed::types::{DecodeIgnore, OwnedType};
use heed::{BytesDecode, BytesEncode};
use roaring::RoaringBitmap;
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
pub use self::ordered_f64_codec::OrderedF64Codec;
pub use self::str_ref::StrRefCodec;
use crate::{CboRoaringBitmapCodec, BEU16};
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
pub type FieldIdCodec = OwnedType<BEU16>;
/// Tries to split a slice in half at the given middle point,