mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-12 07:58:54 +01:00
Polish some details
This commit is contained in:
parent
cb8442a119
commit
51961e1064
@ -1,8 +1,10 @@
|
|||||||
use crate::{try_split_array_at, DocumentId, FieldId};
|
|
||||||
use heed::{BytesDecode, BytesEncode};
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
use heed::{BytesDecode, BytesEncode};
|
||||||
|
|
||||||
|
use crate::{try_split_array_at, DocumentId, FieldId};
|
||||||
|
|
||||||
pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
|
pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
|
||||||
|
|
||||||
impl<'a, C> BytesDecode<'a> for FieldDocIdFacetCodec<C>
|
impl<'a, C> BytesDecode<'a> for FieldDocIdFacetCodec<C>
|
||||||
|
@ -40,6 +40,8 @@ pub struct FacetGroupKey<T> {
|
|||||||
pub left_bound: T,
|
pub left_bound: T,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The value in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`]
|
||||||
|
/// databases.
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
pub struct FacetGroupValue {
|
pub struct FacetGroupValue {
|
||||||
pub size: u8,
|
pub size: u8,
|
||||||
@ -102,6 +104,8 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
|
||||||
|
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
|
||||||
pub struct ByteSliceRef;
|
pub struct ByteSliceRef;
|
||||||
|
|
||||||
impl<'a> BytesEncode<'a> for ByteSliceRef {
|
impl<'a> BytesEncode<'a> for ByteSliceRef {
|
||||||
|
@ -2,6 +2,8 @@ use std::borrow::Cow;
|
|||||||
|
|
||||||
use heed::{BytesDecode, BytesEncode};
|
use heed::{BytesDecode, BytesEncode};
|
||||||
|
|
||||||
|
/// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated
|
||||||
|
/// types are equivalent (= `&'a str`) and these values can reside within another structure.
|
||||||
pub struct StrRefCodec;
|
pub struct StrRefCodec;
|
||||||
impl<'a> BytesEncode<'a> for StrRefCodec {
|
impl<'a> BytesEncode<'a> for StrRefCodec {
|
||||||
type EItem = &'a str;
|
type EItem = &'a str;
|
||||||
|
@ -33,10 +33,6 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
|
||||||
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
let field_id = FieldId::from_be_bytes(field_id_bytes);
|
||||||
|
|
||||||
// document_id_bytes is a big-endian u32
|
|
||||||
// merge_cbo_roaring_bitmap works with native endian u32s
|
|
||||||
// that is a problem, I think
|
|
||||||
|
|
||||||
let (document_id_bytes, normalized_value_bytes) =
|
let (document_id_bytes, normalized_value_bytes) =
|
||||||
try_split_array_at::<_, 4>(bytes).unwrap();
|
try_split_array_at::<_, 4>(bytes).unwrap();
|
||||||
let document_id = u32::from_be_bytes(document_id_bytes);
|
let document_id = u32::from_be_bytes(document_id_bytes);
|
||||||
@ -45,6 +41,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value };
|
let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value };
|
||||||
let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
|
let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
|
||||||
|
|
||||||
|
// document id is encoded in native-endian because of the CBO roaring bitmap codec
|
||||||
facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?;
|
facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,7 +5,6 @@ use std::result::Result as StdResult;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::read_u32_ne_bytes;
|
use super::read_u32_ne_bytes;
|
||||||
// use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
@ -49,33 +48,6 @@ pub fn merge_roaring_bitmaps<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Resul
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub fn keep_first_prefix_value_merge_roaring_bitmaps<'a>(
|
|
||||||
// _key: &[u8],
|
|
||||||
// values: &[Cow<'a, [u8]>],
|
|
||||||
// ) -> Result<Cow<'a, [u8]>> {
|
|
||||||
// if values.len() == 1 {
|
|
||||||
// Ok(values[0].clone())
|
|
||||||
// } else {
|
|
||||||
// let original = decode_prefix_string(&values[0]).unwrap().0;
|
|
||||||
// let merged_bitmaps = values
|
|
||||||
// .iter()
|
|
||||||
// .map(AsRef::as_ref)
|
|
||||||
// .map(decode_prefix_string)
|
|
||||||
// .map(Option::unwrap)
|
|
||||||
// .map(|(_, bitmap_bytes)| bitmap_bytes)
|
|
||||||
// .map(RoaringBitmap::deserialize_from)
|
|
||||||
// .map(StdResult::unwrap)
|
|
||||||
// .reduce(|a, b| a | b)
|
|
||||||
// .unwrap();
|
|
||||||
|
|
||||||
// let cap = std::mem::size_of::<u16>() + original.len() + merged_bitmaps.serialized_size();
|
|
||||||
// let mut buffer = Vec::with_capacity(cap);
|
|
||||||
// encode_prefix_string(original, &mut buffer)?;
|
|
||||||
// merged_bitmaps.serialize_into(&mut buffer)?;
|
|
||||||
// Ok(Cow::Owned(buffer))
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
|
|
||||||
pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
||||||
Ok(values[0].clone())
|
Ok(values[0].clone())
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user