mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Simplify word level position DB into a word position DB
This commit is contained in:
parent
75d341d928
commit
3296bb243c
18 changed files with 220 additions and 545 deletions
|
@ -4,7 +4,7 @@ mod field_id_word_count_codec;
|
|||
mod obkv_codec;
|
||||
mod roaring_bitmap;
|
||||
mod roaring_bitmap_length;
|
||||
mod str_level_position_codec;
|
||||
mod str_beu32_codec;
|
||||
mod str_str_u8_codec;
|
||||
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
|
@ -14,5 +14,5 @@ pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, Roar
|
|||
pub use self::roaring_bitmap_length::{
|
||||
BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
|
||||
};
|
||||
pub use self::str_level_position_codec::StrLevelPositionCodec;
|
||||
pub use self::str_beu32_codec::StrBEU32Codec;
|
||||
pub use self::str_str_u8_codec::StrStrU8Codec;
|
||||
|
|
38
milli/src/heed_codec/str_beu32_codec.rs
Normal file
38
milli/src/heed_codec/str_beu32_codec.rs
Normal file
|
@ -0,0 +1,38 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::mem::size_of;
|
||||
use std::str;
|
||||
|
||||
pub struct StrBEU32Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for StrBEU32Codec {
|
||||
type DItem = (&'a str, u32);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let footer_len = size_of::<u32>();
|
||||
|
||||
if bytes.len() < footer_len {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||
let word = str::from_utf8(word).ok()?;
|
||||
let pos = bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
|
||||
Some((word, pos))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrBEU32Codec {
|
||||
type EItem = (&'a str, u32);
|
||||
|
||||
fn bytes_encode((word, pos): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let pos = pos.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + pos.len());
|
||||
bytes.extend_from_slice(word.as_bytes());
|
||||
bytes.extend_from_slice(&pos[..]);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
|
@ -1,47 +0,0 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::{TryFrom, TryInto};
|
||||
use std::mem::size_of;
|
||||
use std::str;
|
||||
|
||||
use crate::TreeLevel;
|
||||
|
||||
pub struct StrLevelPositionCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for StrLevelPositionCodec {
|
||||
type DItem = (&'a str, TreeLevel, u32, u32);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let footer_len = size_of::<u8>() + size_of::<u32>() * 2;
|
||||
|
||||
if bytes.len() < footer_len {
|
||||
return None;
|
||||
}
|
||||
|
||||
let (word, bytes) = bytes.split_at(bytes.len() - footer_len);
|
||||
let word = str::from_utf8(word).ok()?;
|
||||
|
||||
let (level, bytes) = bytes.split_first()?;
|
||||
let left = bytes[..4].try_into().map(u32::from_be_bytes).ok()?;
|
||||
let right = bytes[4..].try_into().map(u32::from_be_bytes).ok()?;
|
||||
let level = TreeLevel::try_from(*level).ok()?;
|
||||
|
||||
Some((word, level, left, right))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrLevelPositionCodec {
|
||||
type EItem = (&'a str, TreeLevel, u32, u32);
|
||||
|
||||
fn bytes_encode((word, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let left = left.to_be_bytes();
|
||||
let right = right.to_be_bytes();
|
||||
|
||||
let mut bytes = Vec::with_capacity(word.len() + 1 + left.len() + right.len());
|
||||
bytes.extend_from_slice(word.as_bytes());
|
||||
bytes.push((*level).into());
|
||||
bytes.extend_from_slice(&left[..]);
|
||||
bytes.extend_from_slice(&right[..]);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue