mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Change the project to become a workspace with milli as a default-member
This commit is contained in:
parent
d450b971f9
commit
e8639517da
56 changed files with 1053 additions and 2617 deletions
27
milli/src/heed_codec/beu32_str_codec.rs
Normal file
27
milli/src/heed_codec/beu32_str_codec.rs
Normal file
|
@ -0,0 +1,27 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
pub struct BEU32StrCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for BEU32StrCodec {
|
||||
type DItem = (u32, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (n_bytes, str_bytes) = bytes.split_at(4);
|
||||
let n = n_bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
let s = str::from_utf8(str_bytes).ok()?;
|
||||
Some((n, s))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for BEU32StrCodec {
|
||||
type EItem = (u32, &'a str);
|
||||
|
||||
fn bytes_encode((n, s): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(s.len() + 4);
|
||||
bytes.extend_from_slice(&n.to_be_bytes());
|
||||
bytes.extend_from_slice(s.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
29
milli/src/heed_codec/bo_roaring_bitmap_codec.rs
Normal file
29
milli/src/heed_codec/bo_roaring_bitmap_codec.rs
Normal file
|
@ -0,0 +1,29 @@
|
|||
use std::borrow::Cow;
|
||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
pub struct BoRoaringBitmapCodec;
|
||||
|
||||
impl heed::BytesDecode<'_> for BoRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(mut bytes: &[u8]) -> Option<Self::DItem> {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
||||
bitmap.insert(integer);
|
||||
}
|
||||
Some(bitmap)
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for BoRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(item.len() as usize * 4);
|
||||
for integer in item.iter() {
|
||||
bytes.write_u32::<NativeEndian>(integer).ok()?;
|
||||
}
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
108
milli/src/heed_codec/cbo_roaring_bitmap_codec.rs
Normal file
108
milli/src/heed_codec/cbo_roaring_bitmap_codec.rs
Normal file
|
@ -0,0 +1,108 @@
|
|||
use std::borrow::Cow;
|
||||
use std::io;
|
||||
use std::mem::size_of;
|
||||
|
||||
use byteorder::{NativeEndian, ReadBytesExt, WriteBytesExt};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
/// This is the limit where using a byteorder became less size efficient
|
||||
/// than using a direct roaring encoding, it is also the point where we are able
|
||||
/// to determine the encoding used only by using the array of bytes length.
|
||||
const THRESHOLD: usize = 7;
|
||||
|
||||
/// A conditionnal codec that either use the RoaringBitmap
|
||||
/// or a lighter ByteOrder en/decoding method.
|
||||
pub struct CboRoaringBitmapCodec;
|
||||
|
||||
impl CboRoaringBitmapCodec {
|
||||
pub fn serialized_size(roaring: &RoaringBitmap) -> usize {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
roaring.len() as usize * size_of::<u32>()
|
||||
} else {
|
||||
roaring.serialized_size()
|
||||
}
|
||||
}
|
||||
|
||||
pub fn serialize_into(roaring: &RoaringBitmap, vec: &mut Vec<u8>) -> io::Result<()> {
|
||||
if roaring.len() <= THRESHOLD as u64 {
|
||||
// If the number of items (u32s) to encode is less than or equal to the threshold
|
||||
// it means that it would weigh the same or less than the RoaringBitmap
|
||||
// header, so we directly encode them using ByteOrder instead.
|
||||
for integer in roaring {
|
||||
vec.write_u32::<NativeEndian>(integer)?;
|
||||
}
|
||||
Ok(())
|
||||
} else {
|
||||
// Otherwise, we use the classic RoaringBitmapCodec that writes a header.
|
||||
roaring.serialize_into(vec)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn deserialize_from(mut bytes: &[u8]) -> io::Result<RoaringBitmap> {
|
||||
if bytes.len() <= THRESHOLD * size_of::<u32>() {
|
||||
// If there is threshold or less than threshold integers that can fit into this array
|
||||
// of bytes it means that we used the ByteOrder codec serializer.
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
while let Ok(integer) = bytes.read_u32::<NativeEndian>() {
|
||||
bitmap.insert(integer);
|
||||
}
|
||||
Ok(bitmap)
|
||||
} else {
|
||||
// Otherwise, it means we used the classic RoaringBitmapCodec and
|
||||
// that the header takes threshold integers.
|
||||
RoaringBitmap::deserialize_from(bytes)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesDecode<'_> for CboRoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
Self::deserialize_from(bytes).ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for CboRoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut vec = Vec::with_capacity(Self::serialized_size(item));
|
||||
Self::serialize_into(item, &mut vec).ok()?;
|
||||
Some(Cow::Owned(vec))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use std::iter::FromIterator;
|
||||
use heed::{BytesEncode, BytesDecode};
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn verify_encoding_decoding() {
|
||||
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||
let bytes = CboRoaringBitmapCodec::bytes_encode(&input).unwrap();
|
||||
let output = CboRoaringBitmapCodec::bytes_decode(&bytes).unwrap();
|
||||
assert_eq!(input, output);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn verify_threshold() {
|
||||
let input = RoaringBitmap::from_iter(0..THRESHOLD as u32);
|
||||
|
||||
// use roaring bitmap
|
||||
let mut bytes = Vec::new();
|
||||
input.serialize_into(&mut bytes).unwrap();
|
||||
let roaring_size = bytes.len();
|
||||
|
||||
// use byteorder directly
|
||||
let mut bytes = Vec::new();
|
||||
for integer in input {
|
||||
bytes.write_u32::<NativeEndian>(integer).unwrap();
|
||||
}
|
||||
let bo_size = bytes.len();
|
||||
|
||||
assert!(roaring_size > bo_size);
|
||||
}
|
||||
}
|
87
milli/src/heed_codec/facet/facet_level_value_f64_codec.rs
Normal file
87
milli/src/heed_codec/facet/facet_level_value_f64_codec.rs
Normal file
|
@ -0,0 +1,87 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
use crate::FieldId;
|
||||
|
||||
// TODO do not de/serialize right bound when level = 0
|
||||
pub struct FacetLevelValueF64Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetLevelValueF64Codec {
|
||||
type DItem = (FieldId, u8, f64, f64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
let (level, bytes) = bytes.split_first()?;
|
||||
|
||||
let (left, right) = if *level != 0 {
|
||||
let left = bytes[16..24].try_into().ok().map(f64::from_be_bytes)?;
|
||||
let right = bytes[24..].try_into().ok().map(f64::from_be_bytes)?;
|
||||
(left, right)
|
||||
} else {
|
||||
let left = bytes[8..].try_into().ok().map(f64::from_be_bytes)?;
|
||||
(left, left)
|
||||
};
|
||||
|
||||
Some((*field_id, *level, left, right))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for FacetLevelValueF64Codec {
|
||||
type EItem = (FieldId, u8, f64, f64);
|
||||
|
||||
fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut buffer = [0u8; 32];
|
||||
|
||||
let len = if *level != 0 {
|
||||
// Write the globally ordered floats.
|
||||
let bytes = f64_into_bytes(*left)?;
|
||||
buffer[..8].copy_from_slice(&bytes[..]);
|
||||
|
||||
let bytes = f64_into_bytes(*right)?;
|
||||
buffer[8..16].copy_from_slice(&bytes[..]);
|
||||
|
||||
// Then the f64 values just to be able to read them back.
|
||||
let bytes = left.to_be_bytes();
|
||||
buffer[16..24].copy_from_slice(&bytes[..]);
|
||||
|
||||
let bytes = right.to_be_bytes();
|
||||
buffer[24..].copy_from_slice(&bytes[..]);
|
||||
|
||||
32 // length
|
||||
} else {
|
||||
// Write the globally ordered floats.
|
||||
let bytes = f64_into_bytes(*left)?;
|
||||
buffer[..8].copy_from_slice(&bytes[..]);
|
||||
|
||||
// Then the f64 values just to be able to read them back.
|
||||
let bytes = left.to_be_bytes();
|
||||
buffer[8..16].copy_from_slice(&bytes[..]);
|
||||
|
||||
16 // length
|
||||
};
|
||||
|
||||
let mut bytes = Vec::with_capacity(len + 2);
|
||||
bytes.push(*field_id);
|
||||
bytes.push(*level);
|
||||
bytes.extend_from_slice(&buffer[..len]);
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use heed::{BytesEncode, BytesDecode};
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn globally_ordered_f64() {
|
||||
let bytes = FacetLevelValueF64Codec::bytes_encode(&(3, 0, 32.0, 0.0)).unwrap();
|
||||
let (name, level, left, right) = FacetLevelValueF64Codec::bytes_decode(&bytes).unwrap();
|
||||
assert_eq!((name, level, left, right), (3, 0, 32.0, 32.0));
|
||||
|
||||
let bytes = FacetLevelValueF64Codec::bytes_encode(&(3, 1, -32.0, 32.0)).unwrap();
|
||||
let (name, level, left, right) = FacetLevelValueF64Codec::bytes_decode(&bytes).unwrap();
|
||||
assert_eq!((name, level, left, right), (3, 1, -32.0, 32.0));
|
||||
}
|
||||
}
|
44
milli/src/heed_codec/facet/facet_level_value_i64_codec.rs
Normal file
44
milli/src/heed_codec/facet/facet_level_value_i64_codec.rs
Normal file
|
@ -0,0 +1,44 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::facet::value_encoding::{i64_from_bytes, i64_into_bytes};
|
||||
use crate::FieldId;
|
||||
|
||||
pub struct FacetLevelValueI64Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetLevelValueI64Codec {
|
||||
type DItem = (FieldId, u8, i64, i64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
let (level, bytes) = bytes.split_first()?;
|
||||
|
||||
let left = bytes[..8].try_into().map(i64_from_bytes).ok()?;
|
||||
let right = if *level != 0 {
|
||||
bytes[8..].try_into().map(i64_from_bytes).ok()?
|
||||
} else {
|
||||
left
|
||||
};
|
||||
|
||||
Some((*field_id, *level, left, right))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for FacetLevelValueI64Codec {
|
||||
type EItem = (FieldId, u8, i64, i64);
|
||||
|
||||
fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let left = i64_into_bytes(*left);
|
||||
let right = i64_into_bytes(*right);
|
||||
|
||||
let mut bytes = Vec::with_capacity(2 + left.len() + right.len());
|
||||
bytes.push(*field_id);
|
||||
bytes.push(*level);
|
||||
bytes.extend_from_slice(&left[..]);
|
||||
if *level != 0 {
|
||||
bytes.extend_from_slice(&right[..]);
|
||||
}
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
27
milli/src/heed_codec/facet/facet_value_string_codec.rs
Normal file
27
milli/src/heed_codec/facet/facet_value_string_codec.rs
Normal file
|
@ -0,0 +1,27 @@
|
|||
use std::borrow::Cow;
|
||||
use std::str;
|
||||
|
||||
use crate::FieldId;
|
||||
|
||||
pub struct FacetValueStringCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FacetValueStringCodec {
|
||||
type DItem = (FieldId, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
let value = str::from_utf8(bytes).ok()?;
|
||||
Some((*field_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FacetValueStringCodec {
|
||||
type EItem = (FieldId, &'a str);
|
||||
|
||||
fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(value.len() + 1);
|
||||
bytes.push(*field_id);
|
||||
bytes.extend_from_slice(value.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
36
milli/src/heed_codec/facet/field_doc_id_facet_f64_codec.rs
Normal file
36
milli/src/heed_codec/facet/field_doc_id_facet_f64_codec.rs
Normal file
|
@ -0,0 +1,36 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::{FieldId, DocumentId};
|
||||
use crate::facet::value_encoding::f64_into_bytes;
|
||||
|
||||
pub struct FieldDocIdFacetF64Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetF64Codec {
|
||||
type DItem = (FieldId, DocumentId, f64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
|
||||
let (document_id_bytes, bytes) = bytes.split_at(4);
|
||||
let document_id = document_id_bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
|
||||
let value = bytes[8..16].try_into().map(f64::from_be_bytes).ok()?;
|
||||
|
||||
Some((*field_id, document_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetF64Codec {
|
||||
type EItem = (FieldId, DocumentId, f64);
|
||||
|
||||
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(1 + 4 + 8 + 8);
|
||||
bytes.push(*field_id);
|
||||
bytes.extend_from_slice(&document_id.to_be_bytes());
|
||||
let value_bytes = f64_into_bytes(*value)?;
|
||||
bytes.extend_from_slice(&value_bytes);
|
||||
bytes.extend_from_slice(&value.to_be_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
34
milli/src/heed_codec/facet/field_doc_id_facet_i64_codec.rs
Normal file
34
milli/src/heed_codec/facet/field_doc_id_facet_i64_codec.rs
Normal file
|
@ -0,0 +1,34 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
|
||||
use crate::facet::value_encoding::{i64_into_bytes, i64_from_bytes};
|
||||
use crate::{FieldId, DocumentId};
|
||||
|
||||
pub struct FieldDocIdFacetI64Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetI64Codec {
|
||||
type DItem = (FieldId, DocumentId, i64);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
|
||||
let (document_id_bytes, bytes) = bytes.split_at(4);
|
||||
let document_id = document_id_bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
|
||||
let value = bytes[..8].try_into().map(i64_from_bytes).ok()?;
|
||||
|
||||
Some((*field_id, document_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetI64Codec {
|
||||
type EItem = (FieldId, DocumentId, i64);
|
||||
|
||||
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(1 + 4 + 8);
|
||||
bytes.push(*field_id);
|
||||
bytes.extend_from_slice(&document_id.to_be_bytes());
|
||||
bytes.extend_from_slice(&i64_into_bytes(*value));
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
|
@ -0,0 +1,31 @@
|
|||
use std::borrow::Cow;
|
||||
use std::convert::TryInto;
|
||||
use std::str;
|
||||
|
||||
use crate::{FieldId, DocumentId};
|
||||
|
||||
pub struct FieldDocIdFacetStringCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for FieldDocIdFacetStringCodec {
|
||||
type DItem = (FieldId, DocumentId, &'a str);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (field_id, bytes) = bytes.split_first()?;
|
||||
let (document_id_bytes, bytes) = bytes.split_at(4);
|
||||
let document_id = document_id_bytes.try_into().map(u32::from_be_bytes).ok()?;
|
||||
let value = str::from_utf8(bytes).ok()?;
|
||||
Some((*field_id, document_id, value))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for FieldDocIdFacetStringCodec {
|
||||
type EItem = (FieldId, DocumentId, &'a str);
|
||||
|
||||
fn bytes_encode((field_id, document_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(1 + 4 + value.len());
|
||||
bytes.push(*field_id);
|
||||
bytes.extend_from_slice(&document_id.to_be_bytes());
|
||||
bytes.extend_from_slice(value.as_bytes());
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
13
milli/src/heed_codec/facet/mod.rs
Normal file
13
milli/src/heed_codec/facet/mod.rs
Normal file
|
@ -0,0 +1,13 @@
|
|||
mod facet_level_value_f64_codec;
|
||||
mod facet_level_value_i64_codec;
|
||||
mod facet_value_string_codec;
|
||||
mod field_doc_id_facet_f64_codec;
|
||||
mod field_doc_id_facet_i64_codec;
|
||||
mod field_doc_id_facet_string_codec;
|
||||
|
||||
pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
|
||||
pub use self::facet_level_value_i64_codec::FacetLevelValueI64Codec;
|
||||
pub use self::facet_value_string_codec::FacetValueStringCodec;
|
||||
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
||||
pub use self::field_doc_id_facet_i64_codec::FieldDocIdFacetI64Codec;
|
||||
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
14
milli/src/heed_codec/mod.rs
Normal file
14
milli/src/heed_codec/mod.rs
Normal file
|
@ -0,0 +1,14 @@
|
|||
mod beu32_str_codec;
|
||||
mod bo_roaring_bitmap_codec;
|
||||
mod cbo_roaring_bitmap_codec;
|
||||
mod obkv_codec;
|
||||
mod roaring_bitmap_codec;
|
||||
mod str_str_u8_codec;
|
||||
pub mod facet;
|
||||
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
pub use self::bo_roaring_bitmap_codec::BoRoaringBitmapCodec;
|
||||
pub use self::cbo_roaring_bitmap_codec::CboRoaringBitmapCodec;
|
||||
pub use self::obkv_codec::ObkvCodec;
|
||||
pub use self::roaring_bitmap_codec::RoaringBitmapCodec;
|
||||
pub use self::str_str_u8_codec::StrStrU8Codec;
|
20
milli/src/heed_codec/obkv_codec.rs
Normal file
20
milli/src/heed_codec/obkv_codec.rs
Normal file
|
@ -0,0 +1,20 @@
|
|||
use std::borrow::Cow;
|
||||
use obkv::{KvReader, KvWriter};
|
||||
|
||||
pub struct ObkvCodec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for ObkvCodec {
|
||||
type DItem = KvReader<'a>;
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(KvReader::new(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for ObkvCodec {
|
||||
type EItem = KvWriter<Vec<u8>>;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
item.clone().into_inner().map(Cow::Owned).ok()
|
||||
}
|
||||
}
|
22
milli/src/heed_codec/roaring_bitmap_codec.rs
Normal file
22
milli/src/heed_codec/roaring_bitmap_codec.rs
Normal file
|
@ -0,0 +1,22 @@
|
|||
use std::borrow::Cow;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
pub struct RoaringBitmapCodec;
|
||||
|
||||
impl heed::BytesDecode<'_> for RoaringBitmapCodec {
|
||||
type DItem = RoaringBitmap;
|
||||
|
||||
fn bytes_decode(bytes: &[u8]) -> Option<Self::DItem> {
|
||||
RoaringBitmap::deserialize_from(bytes).ok()
|
||||
}
|
||||
}
|
||||
|
||||
impl heed::BytesEncode<'_> for RoaringBitmapCodec {
|
||||
type EItem = RoaringBitmap;
|
||||
|
||||
fn bytes_encode(item: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(item.serialized_size());
|
||||
item.serialize_into(&mut bytes).ok()?;
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
30
milli/src/heed_codec/str_str_u8_codec.rs
Normal file
30
milli/src/heed_codec/str_str_u8_codec.rs
Normal file
|
@ -0,0 +1,30 @@
|
|||
use std::borrow::Cow;
|
||||
use std::str;
|
||||
|
||||
pub struct StrStrU8Codec;
|
||||
|
||||
impl<'a> heed::BytesDecode<'a> for StrStrU8Codec {
|
||||
type DItem = (&'a str, &'a str, u8);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (n, bytes) = bytes.split_last()?;
|
||||
let s1_end = bytes.iter().position(|b| *b == 0)?;
|
||||
let (s1_bytes, s2_bytes) = bytes.split_at(s1_end);
|
||||
let s1 = str::from_utf8(s1_bytes).ok()?;
|
||||
let s2 = str::from_utf8(&s2_bytes[1..]).ok()?;
|
||||
Some((s1, s2, *n))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> heed::BytesEncode<'a> for StrStrU8Codec {
|
||||
type EItem = (&'a str, &'a str, u8);
|
||||
|
||||
fn bytes_encode((s1, s2, n): &Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let mut bytes = Vec::with_capacity(s1.len() + s2.len() + 1 + 1);
|
||||
bytes.extend_from_slice(s1.as_bytes());
|
||||
bytes.push(0);
|
||||
bytes.extend_from_slice(s2.as_bytes());
|
||||
bytes.push(*n);
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
Loading…
Add table
Add a link
Reference in a new issue