mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Plug new indexer
This commit is contained in:
parent
3aaf1d62f3
commit
1d314328f0
36 changed files with 1920 additions and 1826 deletions
|
@ -2,51 +2,65 @@ use std::borrow::Cow;
|
|||
use std::convert::TryInto;
|
||||
use std::{marker, str};
|
||||
|
||||
use super::try_split_at;
|
||||
use crate::error::SerializationError;
|
||||
use crate::heed_codec::RoaringBitmapCodec;
|
||||
use crate::{try_split_array_at, try_split_at, Result};
|
||||
pub type FacetStringLevelZeroValueCodec = StringValueCodec<RoaringBitmapCodec>;
|
||||
|
||||
/// A codec that encodes a string in front of the value.
|
||||
/// A codec that encodes a string in front of a value.
|
||||
///
|
||||
/// The usecase is for the facet string levels algorithm where we must know the
|
||||
/// original string of a normalized facet value, the original values are stored
|
||||
/// in the value to not break the lexicographical ordering of the LMDB keys.
|
||||
pub struct FacetStringLevelZeroValueCodec<C>(marker::PhantomData<C>);
|
||||
pub struct StringValueCodec<C>(marker::PhantomData<C>);
|
||||
|
||||
impl<'a, C> heed::BytesDecode<'a> for FacetStringLevelZeroValueCodec<C>
|
||||
impl<'a, C> heed::BytesDecode<'a> for StringValueCodec<C>
|
||||
where
|
||||
C: heed::BytesDecode<'a>,
|
||||
{
|
||||
type DItem = (&'a str, C::DItem);
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
let (string_len, bytes) = try_split_at(bytes, 2)?;
|
||||
let string_len = string_len.try_into().ok().map(u16::from_be_bytes)?;
|
||||
|
||||
let (string, bytes) = try_split_at(bytes, string_len as usize)?;
|
||||
let string = str::from_utf8(string).ok()?;
|
||||
let (string, bytes) = decode_prefix_string(bytes)?;
|
||||
|
||||
C::bytes_decode(bytes).map(|item| (string, item))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a, C> heed::BytesEncode<'a> for FacetStringLevelZeroValueCodec<C>
|
||||
impl<'a, C> heed::BytesEncode<'a> for StringValueCodec<C>
|
||||
where
|
||||
C: heed::BytesEncode<'a>,
|
||||
{
|
||||
type EItem = (&'a str, C::EItem);
|
||||
|
||||
fn bytes_encode((string, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
|
||||
let string_len: u16 = string.len().try_into().ok()?;
|
||||
let value_bytes = C::bytes_encode(&value)?;
|
||||
|
||||
let mut bytes = Vec::with_capacity(2 + string.len() + value_bytes.len());
|
||||
bytes.extend_from_slice(&string_len.to_be_bytes());
|
||||
bytes.extend_from_slice(string.as_bytes());
|
||||
encode_prefix_string(string, &mut bytes).ok()?;
|
||||
bytes.extend_from_slice(&value_bytes[..]);
|
||||
|
||||
Some(Cow::Owned(bytes))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn decode_prefix_string(value: &[u8]) -> Option<(&str, &[u8])> {
|
||||
let (original_length_bytes, bytes) = try_split_array_at(value)?;
|
||||
let original_length = u16::from_be_bytes(original_length_bytes) as usize;
|
||||
let (string, bytes) = try_split_at(bytes, original_length)?;
|
||||
let string = str::from_utf8(string).ok()?;
|
||||
|
||||
Some((string, bytes))
|
||||
}
|
||||
|
||||
pub fn encode_prefix_string(string: &str, buffer: &mut Vec<u8>) -> Result<()> {
|
||||
let string_len: u16 =
|
||||
string.len().try_into().map_err(|_| SerializationError::InvalidNumberSerialization)?;
|
||||
buffer.extend_from_slice(&string_len.to_be_bytes());
|
||||
buffer.extend_from_slice(string.as_bytes());
|
||||
Ok(())
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use heed::types::Unit;
|
||||
|
@ -54,17 +68,15 @@ mod tests {
|
|||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::*;
|
||||
use crate::CboRoaringBitmapCodec;
|
||||
|
||||
#[test]
|
||||
fn deserialize_roaring_bitmaps() {
|
||||
let string = "abc";
|
||||
let docids: RoaringBitmap = (0..100).chain(3500..4398).collect();
|
||||
let key = (string, docids.clone());
|
||||
let bytes =
|
||||
FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_encode(&key).unwrap();
|
||||
let bytes = StringValueCodec::<RoaringBitmapCodec>::bytes_encode(&key).unwrap();
|
||||
let (out_string, out_docids) =
|
||||
FacetStringLevelZeroValueCodec::<CboRoaringBitmapCodec>::bytes_decode(&bytes).unwrap();
|
||||
StringValueCodec::<RoaringBitmapCodec>::bytes_decode(&bytes).unwrap();
|
||||
assert_eq!((out_string, out_docids), (string, docids));
|
||||
}
|
||||
|
||||
|
@ -72,9 +84,8 @@ mod tests {
|
|||
fn deserialize_unit() {
|
||||
let string = "def";
|
||||
let key = (string, ());
|
||||
let bytes = FacetStringLevelZeroValueCodec::<Unit>::bytes_encode(&key).unwrap();
|
||||
let (out_string, out_unit) =
|
||||
FacetStringLevelZeroValueCodec::<Unit>::bytes_decode(&bytes).unwrap();
|
||||
let bytes = StringValueCodec::<Unit>::bytes_encode(&key).unwrap();
|
||||
let (out_string, out_unit) = StringValueCodec::<Unit>::bytes_decode(&bytes).unwrap();
|
||||
assert_eq!((out_string, out_unit), (string, ()));
|
||||
}
|
||||
}
|
||||
|
|
|
@ -9,7 +9,9 @@ mod field_doc_id_facet_string_codec;
|
|||
pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
|
||||
pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec;
|
||||
pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec;
|
||||
pub use self::facet_string_level_zero_value_codec::FacetStringLevelZeroValueCodec;
|
||||
pub use self::facet_string_level_zero_value_codec::{
|
||||
decode_prefix_string, encode_prefix_string, FacetStringLevelZeroValueCodec,
|
||||
};
|
||||
pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec;
|
||||
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
||||
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue