Create a new database on index and add a specialized codec for it

This commit is contained in:
f3r10 2022-10-12 06:21:35 -05:00 committed by ManyTheFish
parent bfb1f9279b
commit c45d1e3610
3 changed files with 52 additions and 0 deletions

View file

@ -8,6 +8,7 @@ mod roaring_bitmap_length;
mod str_beu32_codec;
mod str_ref;
mod str_str_u8_codec;
mod script_language_codec;
pub use byte_slice_ref::ByteSliceRefCodec;
pub use str_ref::StrRefCodec;
@ -21,3 +22,4 @@ pub use self::roaring_bitmap_length::{
};
pub use self::str_beu32_codec::StrBEU32Codec;
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
pub use self::script_language_codec::ScriptLanguageCodec;

View file

@ -0,0 +1,43 @@
use std::borrow::Cow;
use std::mem::size_of;
use std::str;
use charabia::{Language, Script};
pub struct ScriptLanguageCodec;
impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
type DItem = (Script, Language);
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
let footer_len = size_of::<u32>();
if bytes.len() < footer_len {
return None;
}
let (script, bytes) = bytes.split_at(bytes.len() - footer_len);
let script = str::from_utf8(script).ok()?;
let script_name = Script::from_name(script);
let lan = str::from_utf8(bytes).ok()?;
let lan_name = Language::from_name(lan);
Some((script_name, lan_name))
}
}
impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
type EItem = (Script, Language);
fn bytes_encode((script, lan): &Self::EItem) -> Option<Cow<[u8]>> {
let script_name = script.name();
let lan_name = lan.name();
let mut bytes = Vec::with_capacity(script_name.len() + lan_name.len());
bytes.extend_from_slice(script_name.as_bytes());
bytes.extend_from_slice(lan_name.as_bytes());
Some(Cow::Owned(bytes))
}
}