Use an u16 field id instead of one byte

This commit is contained in:
Kerollmops 2021-07-06 11:31:24 +02:00
parent cc54c41e30
commit 838ed1cd32
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
17 changed files with 115 additions and 64 deletions

View file

@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::convert::TryInto;
use crate::facet::value_encoding::f64_into_bytes;
use crate::FieldId;
use crate::{try_split_array_at, FieldId};
// TODO do not de/serialize right bound when level = 0
pub struct FacetLevelValueF64Codec;
@ -11,7 +11,8 @@ impl<'a> heed::BytesDecode<'a> for FacetLevelValueF64Codec {
type DItem = (FieldId, u8, f64, f64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id, bytes) = bytes.split_first()?;
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
let field_id = u16::from_be_bytes(field_id_bytes);
let (level, bytes) = bytes.split_first()?;
let (left, right) = if *level != 0 {
@ -23,7 +24,7 @@ impl<'a> heed::BytesDecode<'a> for FacetLevelValueF64Codec {
(left, left)
};
Some((*field_id, *level, left, right))
Some((field_id, *level, left, right))
}
}
@ -61,8 +62,8 @@ impl heed::BytesEncode<'_> for FacetLevelValueF64Codec {
16 // length
};
let mut bytes = Vec::with_capacity(len + 2);
bytes.push(*field_id);
let mut bytes = Vec::with_capacity(len + 3);
bytes.extend_from_slice(&field_id.to_be_bytes());
bytes.push(*level);
bytes.extend_from_slice(&buffer[..len]);
Some(Cow::Owned(bytes))

View file

@ -1,14 +1,14 @@
use std::borrow::Cow;
use std::str;
use crate::FieldId;
use crate::{try_split_array_at, FieldId};
pub struct FacetValueStringCodec;
impl FacetValueStringCodec {
pub fn serialize_into(field_id: FieldId, value: &str, out: &mut Vec<u8>) {
out.reserve(value.len() + 1);
out.push(field_id);
out.reserve(value.len() + 2);
out.extend_from_slice(&field_id.to_be_bytes());
out.extend_from_slice(value.as_bytes());
}
}
@ -17,9 +17,10 @@ impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
type DItem = (FieldId, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id, bytes) = bytes.split_first()?;
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
let field_id = u16::from_be_bytes(field_id_bytes);
let value = str::from_utf8(bytes).ok()?;
Some((*field_id, value))
Some((field_id, value))
}
}

View file

@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::convert::TryInto;
use crate::facet::value_encoding::f64_into_bytes;
use crate::{DocumentId, FieldId};
use crate::{try_split_array_at, DocumentId, FieldId};
pub struct FieldDocIdFacetF64Codec;
@ -10,14 +10,15 @@ impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetF64Codec {
type DItem = (FieldId, DocumentId, f64);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id, bytes) = bytes.split_first()?;
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
let field_id = u16::from_be_bytes(field_id_bytes);
let (document_id_bytes, bytes) = bytes.split_at(4);
let document_id = document_id_bytes.try_into().map(u32::from_be_bytes).ok()?;
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
let document_id = u32::from_be_bytes(document_id_bytes);
let value = bytes[8..16].try_into().map(f64::from_be_bytes).ok()?;
Some((*field_id, document_id, value))
Some((field_id, document_id, value))
}
}
@ -25,8 +26,8 @@ impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetF64Codec {
type EItem = (FieldId, DocumentId, f64);
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
let mut bytes = Vec::with_capacity(1 + 4 + 8 + 8);
bytes.push(*field_id);
let mut bytes = Vec::with_capacity(2 + 4 + 8 + 8);
bytes.extend_from_slice(&field_id.to_be_bytes());
bytes.extend_from_slice(&document_id.to_be_bytes());
let value_bytes = f64_into_bytes(*value)?;
bytes.extend_from_slice(&value_bytes);

View file

@ -1,8 +1,7 @@
use std::borrow::Cow;
use std::convert::TryInto;
use std::str;
use crate::{DocumentId, FieldId};
use crate::{try_split_array_at, DocumentId, FieldId};
pub struct FieldDocIdFacetStringCodec;
@ -13,8 +12,8 @@ impl FieldDocIdFacetStringCodec {
value: &str,
out: &mut Vec<u8>,
) {
out.reserve(1 + 4 + value.len());
out.push(field_id);
out.reserve(2 + 4 + value.len());
out.extend_from_slice(&field_id.to_be_bytes());
out.extend_from_slice(&document_id.to_be_bytes());
out.extend_from_slice(value.as_bytes());
}
@ -24,11 +23,14 @@ impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec {
type DItem = (FieldId, DocumentId, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id, bytes) = bytes.split_first()?;
let (document_id_bytes, bytes) = bytes.split_at(4);
let document_id = document_id_bytes.try_into().map(u32::from_be_bytes).ok()?;
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
let field_id = u16::from_be_bytes(field_id_bytes);
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
let document_id = u32::from_be_bytes(document_id_bytes);
let value = str::from_utf8(bytes).ok()?;
Some((*field_id, document_id, value))
Some((field_id, document_id, value))
}
}

View file

@ -1,7 +1,6 @@
use std::borrow::Cow;
use std::convert::TryInto;
use crate::FieldId;
use crate::{try_split_array_at, FieldId};
pub struct FieldIdWordCountCodec;
@ -9,7 +8,9 @@ impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec {
type DItem = (FieldId, u8);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let [field_id, word_count]: [u8; 2] = bytes.try_into().ok()?;
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
let field_id = u16::from_be_bytes(field_id_bytes);
let ([word_count], _nothing) = try_split_array_at(bytes)?;
Some((field_id, word_count))
}
}
@ -18,6 +19,9 @@ impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec {
type EItem = (FieldId, u8);
fn bytes_encode((field_id, word_count): &Self::EItem) -> Option<Cow<[u8]>> {
Some(Cow::Owned(vec![*field_id, *word_count]))
let mut bytes = Vec::with_capacity(2 + 1);
bytes.extend_from_slice(&field_id.to_be_bytes());
bytes.push(*word_count);
Some(Cow::Owned(bytes))
}
}

View file

@ -1,19 +1,19 @@
use std::borrow::Cow;
use obkv::{KvReader, KvWriter};
use obkv::{KvReaderU16, KvWriterU16};
pub struct ObkvCodec;
impl<'a> heed::BytesDecode<'a> for ObkvCodec {
type DItem = KvReader<'a>;
type DItem = KvReaderU16<'a>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Some(KvReader::new(bytes))
Some(KvReaderU16::new(bytes))
}
}
impl heed::BytesEncode<'_> for ObkvCodec {
type EItem = KvWriter<Vec<u8>>;
type EItem = KvWriterU16<Vec<u8>>;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
item.clone().into_inner().map(Cow::Owned).ok()