Introduce the word_level_position_docids Index database

This commit is contained in:
Kerollmops 2021-03-11 17:24:35 +01:00 committed by many
parent 75e7b1e3da
commit b0a417f342
No known key found for this signature in database
GPG key ID: 2CEF23B75189EACA
7 changed files with 54 additions and 3 deletions

View file

@ -2,6 +2,7 @@ mod beu32_str_codec;
mod obkv_codec;
mod roaring_bitmap;
mod roaring_bitmap_length;
mod str_level_position_codec;
mod str_str_u8_codec;
pub mod facet;
@ -9,4 +10,5 @@ pub use self::beu32_str_codec::BEU32StrCodec;
pub use self::obkv_codec::ObkvCodec;
pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, RoaringBitmapCodec};
pub use self::roaring_bitmap_length::{BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec};
pub use self::str_level_position_codec::StrLevelPositionCodec;
pub use self::str_str_u8_codec::StrStrU8Codec;

View file

@ -0,0 +1,42 @@
use std::borrow::Cow;
use std::convert::TryInto;
use std::mem::size_of;
use std::str;
pub struct StrLevelPositionCodec;
impl<'a> heed::BytesDecode<'a> for StrLevelPositionCodec {
type DItem = (&'a str, u8, u32, u32);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let footer_len = size_of::<u8>() + size_of::<u32>() * 2;
if bytes.len() < footer_len { return None }
let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
let word = str::from_utf8(word).ok()?;
let (level, bytes) = bytes.split_first()?;
let left = bytes[..4].try_into().map(u32::from_be_bytes).ok()?;
let right = bytes[4..].try_into().map(u32::from_be_bytes).ok()?;
Some((word, *level, left, right))
}
}
impl<'a> heed::BytesEncode<'a> for StrLevelPositionCodec {
type EItem = (&'a str, u8, u32, u32);
fn bytes_encode((word, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
let left = left.to_be_bytes();
let right = right.to_be_bytes();
let mut bytes = Vec::with_capacity(word.len() + 1 + left.len() + right.len());
bytes.extend_from_slice(word.as_bytes());
bytes.push(*level);
bytes.extend_from_slice(&left[..]);
bytes.extend_from_slice(&right[..]);
Some(Cow::Owned(bytes))
}
}