Make the changes to use heed v0.20-alpha.6

This commit is contained in:
Clément Renault 2023-11-22 18:21:19 +01:00
parent 56a0d91ecd
commit 0d4482625a
No known key found for this signature in database
GPG key ID: F250A4C4E3AE5F5F
54 changed files with 611 additions and 477 deletions

View file

@ -2,26 +2,28 @@ use std::borrow::Cow;
use std::convert::TryInto;
use std::str;
use heed::BoxedError;
pub struct BEU16StrCodec;
impl<'a> heed::BytesDecode<'a> for BEU16StrCodec {
type DItem = (u16, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n_bytes, str_bytes) = bytes.split_at(2);
let n = n_bytes.try_into().map(u16::from_be_bytes).ok()?;
let s = str::from_utf8(str_bytes).ok()?;
Some((n, s))
let n = n_bytes.try_into().map(u16::from_be_bytes)?;
let s = str::from_utf8(str_bytes)?;
Ok((n, s))
}
}
impl<'a> heed::BytesEncode<'a> for BEU16StrCodec {
type EItem = (u16, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 2);
bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes());
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View file

@ -2,26 +2,28 @@ use std::borrow::Cow;
use std::convert::TryInto;
use std::str;
use heed::BoxedError;
pub struct BEU32StrCodec;
impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
type DItem = (u32, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n_bytes, str_bytes) = bytes.split_at(4);
let n = n_bytes.try_into().map(u32::from_be_bytes).ok()?;
let s = str::from_utf8(str_bytes).ok()?;
Some((n, s))
let n = n_bytes.try_into().map(u32::from_be_bytes)?;
let s = str::from_utf8(str_bytes)?;
Ok((n, s))
}
}
impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
type EItem = (u32, &'a str);
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((n, s): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s.len() + 4);
bytes.extend_from_slice(&n.to_be_bytes());
bytes.extend_from_slice(s.as_bytes());
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View file

@ -1,6 +1,6 @@
use std::borrow::Cow;
use heed::{BytesDecode, BytesEncode};
use heed::{BoxedError, BytesDecode, BytesEncode};
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
@ -9,15 +9,15 @@ pub struct ByteSliceRefCodec;
impl<'a> BytesEncode<'a> for ByteSliceRefCodec {
type EItem = &'a [u8];
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item))
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
Ok(Cow::Borrowed(item))
}
}
impl<'a> BytesDecode<'a> for ByteSliceRefCodec {
type DItem = &'a [u8];
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Some(bytes)
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Ok(bytes)
}
}

View file

@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::marker::PhantomData;
use heed::{BytesDecode, BytesEncode};
use heed::{BoxedError, BytesDecode, BytesEncode};
use crate::{try_split_array_at, DocumentId, FieldId};
@ -13,16 +13,16 @@ where
{
type DItem = (FieldId, DocumentId, C::DItem);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (field_id_bytes, bytes) = try_split_array_at(bytes).unwrap();
let field_id = u16::from_be_bytes(field_id_bytes);
let (document_id_bytes, bytes) = try_split_array_at(bytes)?;
let (document_id_bytes, bytes) = try_split_array_at(bytes).unwrap();
let document_id = u32::from_be_bytes(document_id_bytes);
let value = C::bytes_decode(bytes)?;
Some((field_id, document_id, value))
Ok((field_id, document_id, value))
}
}
@ -32,13 +32,15 @@ where
{
type EItem = (FieldId, DocumentId, C::EItem);
fn bytes_encode((field_id, document_id, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode(
(field_id, document_id, value): &'a Self::EItem,
) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(32);
bytes.extend_from_slice(&field_id.to_be_bytes()); // 2 bytes
bytes.extend_from_slice(&document_id.to_be_bytes()); // 4 bytes
let value_bytes = C::bytes_encode(value)?;
// variable length, if f64 -> 16 bytes, if string -> large, potentially
bytes.extend_from_slice(&value_bytes);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View file

@ -5,8 +5,8 @@ use std::borrow::Cow;
use std::convert::TryFrom;
use std::marker::PhantomData;
use heed::types::{DecodeIgnore, OwnedType};
use heed::{BytesDecode, BytesEncode};
use heed::types::DecodeIgnore;
use heed::{BoxedError, BytesDecode, BytesEncode};
use roaring::RoaringBitmap;
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
@ -18,7 +18,7 @@ pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
pub type FieldIdCodec = OwnedType<BEU16>;
pub type FieldIdCodec = BEU16;
/// Tries to split a slice in half at the given middle point,
/// `None` if the slice is too short.
@ -58,15 +58,15 @@ where
{
type EItem = FacetGroupKey<T::EItem>;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut v = vec![];
v.extend_from_slice(&value.field_id.to_be_bytes());
v.extend_from_slice(&[value.level]);
let bound = T::bytes_encode(&value.left_bound)?;
let bound = T::bytes_encode(&value.left_bound).unwrap();
v.extend_from_slice(&bound);
Some(Cow::Owned(v))
Ok(Cow::Owned(v))
}
}
impl<'a, T> heed::BytesDecode<'a> for FacetGroupKeyCodec<T>
@ -75,11 +75,11 @@ where
{
type DItem = FacetGroupKey<T::DItem>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).ok()?);
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1])?);
let level = bytes[2];
let bound = T::bytes_decode(&bytes[3..])?;
Some(FacetGroupKey { field_id: fid, level, left_bound: bound })
Ok(FacetGroupKey { field_id: fid, level, left_bound: bound })
}
}
@ -87,17 +87,17 @@ pub struct FacetGroupValueCodec;
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
type EItem = FacetGroupValue;
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
fn bytes_encode(value: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
let mut v = vec![value.size];
CboRoaringBitmapCodec::serialize_into(&value.bitmap, &mut v);
Some(Cow::Owned(v))
Ok(Cow::Owned(v))
}
}
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
type DItem = FacetGroupValue;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let size = bytes[0];
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..]).ok()?;
Some(FacetGroupValue { size, bitmap })
let bitmap = CboRoaringBitmapCodec::deserialize_from(&bytes[1..])?;
Ok(FacetGroupValue { size, bitmap })
}
}

View file

@ -1,7 +1,7 @@
use std::borrow::Cow;
use std::convert::TryInto;
use heed::BytesDecode;
use heed::{BoxedError, BytesDecode};
use crate::facet::value_encoding::f64_into_bytes;
@ -10,28 +10,28 @@ pub struct OrderedF64Codec;
impl<'a> BytesDecode<'a> for OrderedF64Codec {
type DItem = f64;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
if bytes.len() < 16 {
return None;
panic!() // TODO don't panic
}
let f = bytes[8..].try_into().ok().map(f64::from_be_bytes)?;
Some(f)
let f = bytes[8..].try_into().ok().map(f64::from_be_bytes).unwrap();
Ok(f)
}
}
impl heed::BytesEncode<'_> for OrderedF64Codec {
type EItem = f64;
fn bytes_encode(f: &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode(f: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut buffer = [0u8; 16];
// write the globally ordered float
let bytes = f64_into_bytes(*f)?;
let bytes = f64_into_bytes(*f).unwrap();
buffer[..8].copy_from_slice(&bytes[..]);
// Then the f64 value just to be able to read it back
let bytes = f.to_be_bytes();
buffer[8..16].copy_from_slice(&bytes[..]);
Some(Cow::Owned(buffer.to_vec()))
Ok(Cow::Owned(buffer.to_vec()))
}
}

View file

@ -1,5 +1,7 @@
use std::borrow::Cow;
use heed::BoxedError;
use crate::{try_split_array_at, FieldId};
pub struct FieldIdWordCountCodec;
@ -7,21 +9,21 @@ pub struct FieldIdWordCountCodec;
impl<'a> heed::BytesDecode<'a> for FieldIdWordCountCodec {
type DItem = (FieldId, u8);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (field_id_bytes, bytes) = try_split_array_at(bytes).unwrap();
let field_id = u16::from_be_bytes(field_id_bytes);
let ([word_count], _nothing) = try_split_array_at(bytes)?;
Some((field_id, word_count))
let ([word_count], _nothing) = try_split_array_at(bytes).unwrap();
Ok((field_id, word_count))
}
}
impl<'a> heed::BytesEncode<'a> for FieldIdWordCountCodec {
type EItem = (FieldId, u8);
fn bytes_encode((field_id, word_count): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((field_id, word_count): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(2 + 1);
bytes.extend_from_slice(&field_id.to_be_bytes());
bytes.push(*word_count);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View file

@ -1,7 +1,7 @@
use std::borrow::Cow;
use fst::Set;
use heed::{BytesDecode, BytesEncode};
use heed::{BoxedError, BytesDecode, BytesEncode};
/// A codec for values of type `Set<&[u8]>`.
pub struct FstSetCodec;
@ -9,15 +9,15 @@ pub struct FstSetCodec;
impl<'a> BytesEncode<'a> for FstSetCodec {
type EItem = Set<Vec<u8>>;
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item.as_fst().as_bytes()))
fn bytes_encode(item: &'a Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_fst().as_bytes()))
}
}
impl<'a> BytesDecode<'a> for FstSetCodec {
type DItem = Set<&'a [u8]>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Set::new(bytes).ok()
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Set::new(bytes).map_err(Into::into)
}
}

View file

@ -13,6 +13,7 @@ mod str_ref;
mod str_str_u8_codec;
pub use byte_slice_ref::ByteSliceRefCodec;
use heed::BoxedError;
pub use str_ref::StrRefCodec;
pub use self::beu16_str_codec::BEU16StrCodec;
@ -31,5 +32,5 @@ pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
pub trait BytesDecodeOwned {
type DItem;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem>;
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError>;
}

View file

@ -1,5 +1,6 @@
use std::borrow::Cow;
use heed::BoxedError;
use obkv::{KvReaderU16, KvWriterU16};
pub struct ObkvCodec;
@ -7,15 +8,15 @@ pub struct ObkvCodec;
impl<'a> heed::BytesDecode<'a> for ObkvCodec {
type DItem = KvReaderU16<'a>;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Some(KvReaderU16::new(bytes))
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
Ok(KvReaderU16::new(bytes))
}
}
impl heed::BytesEncode<'_> for ObkvCodec {
type EItem = KvWriterU16<Vec<u8>>;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
item.clone().into_inner().map(Cow::Owned).ok()
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
item.clone().into_inner().map(Cow::Owned).map_err(Into::into)
}
}

View file

@ -2,7 +2,7 @@ use std::borrow::Cow;
use std::convert::TryInto;
use std::mem::size_of;
use heed::BytesDecode;
use heed::{BoxedError, BytesDecode};
use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned;
@ -19,22 +19,22 @@ impl BoRoaringBitmapCodec {
impl BytesDecode<'_> for BoRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
let mut bitmap = RoaringBitmap::new();
for chunk in bytes.chunks(size_of::<u32>()) {
let bytes = chunk.try_into().ok()?;
let bytes = chunk.try_into()?;
bitmap.push(u32::from_ne_bytes(bytes));
}
Some(bitmap)
Ok(bitmap)
}
}
impl BytesDecodeOwned for BoRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::bytes_decode(bytes)
}
}
@ -42,9 +42,9 @@ impl BytesDecodeOwned for BoRoaringBitmapCodec {
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut out = Vec::new();
BoRoaringBitmapCodec::serialize_into(item, &mut out);
Some(Cow::Owned(out))
Ok(Cow::Owned(out))
}
}

View file

@ -3,6 +3,7 @@ use std::io;
use std::mem::size_of;
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
use heed::BoxedError;
use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned;
@ -132,26 +133,26 @@ impl CboRoaringBitmapCodec {
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
Self::deserialize_from(bytes).ok()
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).map_err(Into::into)
}
}
impl BytesDecodeOwned for CboRoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
Self::deserialize_from(bytes).ok()
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::deserialize_from(bytes).map_err(Into::into)
}
}
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut vec = Vec::with_capacity(Self::serialized_size(item));
Self::serialize_into(item, &mut vec);
Some(Cow::Owned(vec))
Ok(Cow::Owned(vec))
}
}

View file

@ -1,5 +1,6 @@
use std::borrow::Cow;
use heed::BoxedError;
use roaring::RoaringBitmap;
use crate::heed_codec::BytesDecodeOwned;
@ -9,25 +10,25 @@ pub struct RoaringBitmapCodec;
impl heed::BytesDecode<'_> for RoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmap::deserialize_unchecked_from(bytes).ok()
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmap::deserialize_unchecked_from(bytes).map_err(Into::into)
}
}
impl BytesDecodeOwned for RoaringBitmapCodec {
type DItem = RoaringBitmap;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmap::deserialize_from(bytes).ok()
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmap::deserialize_from(bytes).map_err(Into::into)
}
}
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
type EItem = RoaringBitmap;
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode(item: &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(item.serialized_size());
item.serialize_into(&mut bytes).ok()?;
Some(Cow::Owned(bytes))
item.serialize_into(&mut bytes)?;
Ok(Cow::Owned(bytes))
}
}

View file

@ -1,6 +1,6 @@
use std::mem;
use heed::BytesDecode;
use heed::{BoxedError, BytesDecode};
use crate::heed_codec::BytesDecodeOwned;
@ -9,15 +9,15 @@ pub struct BoRoaringBitmapLenCodec;
impl BytesDecode<'_> for BoRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
Some((bytes.len() / mem::size_of::<u32>()) as u64)
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Ok((bytes.len() / mem::size_of::<u32>()) as u64)
}
}
impl BytesDecodeOwned for BoRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::bytes_decode(bytes)
}
}

View file

@ -1,6 +1,6 @@
use std::mem;
use heed::BytesDecode;
use heed::{BoxedError, BytesDecode};
use super::{BoRoaringBitmapLenCodec, RoaringBitmapLenCodec};
use crate::heed_codec::roaring_bitmap::cbo_roaring_bitmap_codec::THRESHOLD;
@ -11,7 +11,7 @@ pub struct CboRoaringBitmapLenCodec;
impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
if bytes.len() <= THRESHOLD * mem::size_of::<u32>() {
// If there is threshold or less than threshold integers that can fit into this array
// of bytes it means that we used the ByteOrder codec serializer.
@ -27,7 +27,7 @@ impl BytesDecode<'_> for CboRoaringBitmapLenCodec {
impl BytesDecodeOwned for CboRoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
Self::bytes_decode(bytes)
}
}

View file

@ -2,6 +2,7 @@ use std::io::{self, BufRead, Read};
use std::mem;
use byteorder::{LittleEndian, ReadBytesExt};
use heed::BoxedError;
use crate::heed_codec::BytesDecodeOwned;
@ -56,16 +57,16 @@ impl RoaringBitmapLenCodec {
impl heed::BytesDecode<'_> for RoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
fn bytes_decode(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
}
}
impl BytesDecodeOwned for RoaringBitmapLenCodec {
type DItem = u64;
fn bytes_decode_owned(bytes: &[u8]) -> Option<Self::DItem> {
RoaringBitmapLenCodec::deserialize_from_slice(bytes).ok()
fn bytes_decode_owned(bytes: &[u8]) -> Result<Self::DItem, BoxedError> {
RoaringBitmapLenCodec::deserialize_from_slice(bytes).map_err(Into::into)
}
}

View file

@ -2,29 +2,30 @@ use std::borrow::Cow;
use std::str;
use charabia::{Language, Script};
use heed::BoxedError;
pub struct ScriptLanguageCodec;
impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
type DItem = (Script, Language);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let sep = bytes.iter().position(|b| *b == 0)?;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let sep = bytes.iter().position(|b| *b == 0).unwrap();
let (s_bytes, l_bytes) = bytes.split_at(sep);
let script = str::from_utf8(s_bytes).ok()?;
let script = str::from_utf8(s_bytes)?;
let script_name = Script::from_name(script);
let lan = str::from_utf8(l_bytes).ok()?;
let lan = str::from_utf8(l_bytes)?;
// skip '\0' byte between the two strings.
let lan_name = Language::from_name(&lan[1..]);
Some((script_name, lan_name))
Ok((script_name, lan_name))
}
}
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
type EItem = (Script, Language);
fn bytes_encode((script, lan): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let script_name = script.name().as_bytes();
let lan_name = lan.name().as_bytes();
@ -33,6 +34,6 @@ impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
bytes.push(0);
bytes.extend_from_slice(lan_name);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View file

@ -3,37 +3,39 @@ use std::convert::TryInto;
use std::mem::size_of;
use std::str;
use heed::BoxedError;
pub struct StrBEU32Codec;
impl<'a> heed::BytesDecode<'a> for StrBEU32Codec {
type DItem = (&'a str, u32);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let footer_len = size_of::<u32>();
if bytes.len() < footer_len {
return None;
panic!() // TODO Do not panic
}
let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
let word = str::from_utf8(word).ok()?;
let pos = bytes.try_into().map(u32::from_be_bytes).ok()?;
let word = str::from_utf8(word)?;
let pos = bytes.try_into().map(u32::from_be_bytes)?;
Some((word, pos))
Ok((word, pos))
}
}
impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
type EItem = (&'a str, u32);
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let pos = pos.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + pos.len());
bytes.extend_from_slice(word.as_bytes());
bytes.extend_from_slice(&pos[..]);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}
@ -42,26 +44,26 @@ pub struct StrBEU16Codec;
impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
type DItem = (&'a str, u16);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let footer_len = size_of::<u16>();
if bytes.len() < footer_len + 1 {
return None;
panic!() // TODO do not panic
}
let (word_plus_nul_byte, bytes) = bytes.split_at(bytes.len() - footer_len);
let (_, word) = word_plus_nul_byte.split_last()?;
let word = str::from_utf8(word).ok()?;
let pos = bytes.try_into().map(u16::from_be_bytes).ok()?;
let (_, word) = word_plus_nul_byte.split_last().unwrap();
let word = str::from_utf8(word).ok().unwrap();
let pos = bytes.try_into().map(u16::from_be_bytes).ok().unwrap();
Some((word, pos))
Ok((word, pos))
}
}
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
type EItem = (&'a str, u16);
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((word, pos): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let pos = pos.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + 1 + pos.len());
@ -69,6 +71,6 @@ impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
bytes.push(0);
bytes.extend_from_slice(&pos[..]);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}

View file

@ -1,6 +1,6 @@
use std::borrow::Cow;
use heed::{BytesDecode, BytesEncode};
use heed::{BoxedError, BytesDecode, BytesEncode};
/// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated
/// types are equivalent (= `&'a str`) and these values can reside within another structure.
@ -8,15 +8,14 @@ pub struct StrRefCodec;
impl<'a> BytesEncode<'a> for StrRefCodec {
type EItem = &'a str;
fn bytes_encode(item: &'a &'a str) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item.as_bytes()))
fn bytes_encode(item: &'a &'a str) -> Result<Cow<'a, [u8]>, BoxedError> {
Ok(Cow::Borrowed(item.as_bytes()))
}
}
impl<'a> BytesDecode<'a> for StrRefCodec {
type DItem = &'a str;
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let s = std::str::from_utf8(bytes).ok()?;
Some(s)
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
std::str::from_utf8(bytes).map_err(Into::into)
}
}

View file

@ -1,32 +1,34 @@
use std::borrow::Cow;
use std::str;
use heed::BoxedError;
pub struct U8StrStrCodec;
impl<'a> heed::BytesDecode<'a> for U8StrStrCodec {
type DItem = (u8, &'a str, &'a str);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (n, bytes) = bytes.split_first()?;
let s1_end = bytes.iter().position(|b| *b == 0)?;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n, bytes) = bytes.split_first().unwrap();
let s1_end = bytes.iter().position(|b| *b == 0).unwrap();
let (s1_bytes, rest) = bytes.split_at(s1_end);
let s2_bytes = &rest[1..];
let s1 = str::from_utf8(s1_bytes).ok()?;
let s2 = str::from_utf8(s2_bytes).ok()?;
Some((*n, s1, s2))
let s1 = str::from_utf8(s1_bytes).ok().unwrap();
let s2 = str::from_utf8(s2_bytes).ok().unwrap();
Ok((*n, s1, s2))
}
}
impl<'a> heed::BytesEncode<'a> for U8StrStrCodec {
type EItem = (u8, &'a str, &'a str);
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1.as_bytes());
bytes.push(0);
bytes.extend_from_slice(s2.as_bytes());
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}
pub struct UncheckedU8StrStrCodec;
@ -34,24 +36,24 @@ pub struct UncheckedU8StrStrCodec;
impl<'a> heed::BytesDecode<'a> for UncheckedU8StrStrCodec {
type DItem = (u8, &'a [u8], &'a [u8]);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let (n, bytes) = bytes.split_first()?;
let s1_end = bytes.iter().position(|b| *b == 0)?;
fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
let (n, bytes) = bytes.split_first().unwrap();
let s1_end = bytes.iter().position(|b| *b == 0).unwrap();
let (s1_bytes, rest) = bytes.split_at(s1_end);
let s2_bytes = &rest[1..];
Some((*n, s1_bytes, s2_bytes))
Ok((*n, s1_bytes, s2_bytes))
}
}
impl<'a> heed::BytesEncode<'a> for UncheckedU8StrStrCodec {
type EItem = (u8, &'a [u8], &'a [u8]);
fn bytes_encode((n, s1, s2): &Self::EItem) -> Option<Cow<[u8]>> {
fn bytes_encode((n, s1, s2): &Self::EItem) -> Result<Cow<[u8]>, BoxedError> {
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1);
bytes.push(*n);
bytes.extend_from_slice(s1);
bytes.push(0);
bytes.extend_from_slice(s2);
Some(Cow::Owned(bytes))
Ok(Cow::Owned(bytes))
}
}