Polish some details

This commit is contained in:
Loïc Lecrenier 2022-09-08 13:41:01 +02:00 committed by Loïc Lecrenier
parent cb8442a119
commit 51961e1064
5 changed files with 11 additions and 34 deletions

View File

@ -1,8 +1,10 @@
use crate::{try_split_array_at, DocumentId, FieldId};
use heed::{BytesDecode, BytesEncode};
use std::borrow::Cow;
use std::marker::PhantomData;
use heed::{BytesDecode, BytesEncode};
use crate::{try_split_array_at, DocumentId, FieldId};
pub struct FieldDocIdFacetCodec<C>(PhantomData<C>);
impl<'a, C> BytesDecode<'a> for FieldDocIdFacetCodec<C>

View File

@ -40,6 +40,8 @@ pub struct FacetGroupKey<T> {
pub left_bound: T,
}
/// The value in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`]
/// databases.
#[derive(Debug)]
pub struct FacetGroupValue {
pub size: u8,
@ -102,6 +104,8 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
}
}
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
pub struct ByteSliceRef;
impl<'a> BytesEncode<'a> for ByteSliceRef {

View File

@ -2,6 +2,8 @@ use std::borrow::Cow;
use heed::{BytesDecode, BytesEncode};
/// A codec for values of type `&str`. Unlike `Str`, its `EItem` and `DItem` associated
/// types are equivalent (= `&'a str`) and these values can reside within another structure.
pub struct StrRefCodec;
impl<'a> BytesEncode<'a> for StrRefCodec {
type EItem = &'a str;

View File

@ -33,10 +33,6 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
let field_id = FieldId::from_be_bytes(field_id_bytes);
// document_id_bytes is a big-endian u32
// merge_cbo_roaring_bitmap works with native endian u32s
// that is a problem, I think
let (document_id_bytes, normalized_value_bytes) =
try_split_array_at::<_, 4>(bytes).unwrap();
let document_id = u32::from_be_bytes(document_id_bytes);
@ -45,6 +41,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
let key = FacetGroupKey { field_id, level: 0, left_bound: normalised_value };
let key_bytes = FacetGroupKeyCodec::<StrRefCodec>::bytes_encode(&key).unwrap();
// document id is encoded in native-endian because of the CBO roaring bitmap codec
facet_string_docids_sorter.insert(&key_bytes, &document_id.to_ne_bytes())?;
}

View File

@ -5,7 +5,6 @@ use std::result::Result as StdResult;
use roaring::RoaringBitmap;
use super::read_u32_ne_bytes;
// use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
use crate::heed_codec::CboRoaringBitmapCodec;
use crate::Result;
@ -49,33 +48,6 @@ pub fn merge_roaring_bitmaps<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Resul
}
}
// pub fn keep_first_prefix_value_merge_roaring_bitmaps<'a>(
// _key: &[u8],
// values: &[Cow<'a, [u8]>],
// ) -> Result<Cow<'a, [u8]>> {
// if values.len() == 1 {
// Ok(values[0].clone())
// } else {
// let original = decode_prefix_string(&values[0]).unwrap().0;
// let merged_bitmaps = values
// .iter()
// .map(AsRef::as_ref)
// .map(decode_prefix_string)
// .map(Option::unwrap)
// .map(|(_, bitmap_bytes)| bitmap_bytes)
// .map(RoaringBitmap::deserialize_from)
// .map(StdResult::unwrap)
// .reduce(|a, b| a | b)
// .unwrap();
// let cap = std::mem::size_of::<u16>() + original.len() + merged_bitmaps.serialized_size();
// let mut buffer = Vec::with_capacity(cap);
// encode_prefix_string(original, &mut buffer)?;
// merged_bitmaps.serialize_into(&mut buffer)?;
// Ok(Cow::Owned(buffer))
// }
// }
pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
Ok(values[0].clone())
}