Split position DB into fid and relative position DB

This commit is contained in:
Loïc Lecrenier 2023-03-23 09:22:01 +01:00
parent 56b7209f26
commit 9b2653427d
11 changed files with 162 additions and 135 deletions

View file

@ -21,5 +21,5 @@ pub use self::roaring_bitmap_length::{
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
};
pub use self::script_language_codec::ScriptLanguageCodec;
pub use self::str_beu32_codec::StrBEU32Codec;
pub use self::str_beu32_codec::{StrBEU32Codec, StrBEU16Codec};
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};

View file

@ -36,3 +36,37 @@ impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
Some(Cow::Owned(bytes))
}
}
pub struct StrBEU16Codec;
impl<'a> heed::BytesDecode<'a> for StrBEU16Codec {
type DItem = (&'a str, u16);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let footer_len = size_of::<u16>();
if bytes.len() < footer_len {
return None;
}
let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
let word = str::from_utf8(word).ok()?;
let pos = bytes.try_into().map(u16::from_be_bytes).ok()?;
Some((word, pos))
}
}
impl<'a> heed::BytesEncode<'a> for StrBEU16Codec {
type EItem = (&'a str, u16);
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
let pos = pos.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + pos.len());
bytes.extend_from_slice(word.as_bytes());
bytes.extend_from_slice(&pos[..]);
Some(Cow::Owned(bytes))
}
}