mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-24 04:07:30 +01:00
Prepare refactor of facets database
Prepare refactor of facets database
This commit is contained in:
parent
004c09a8e2
commit
c3f49f766d
1
infos/src/main.rs
Normal file
1
infos/src/main.rs
Normal file
@ -0,0 +1 @@
|
|||||||
|
|
@ -1,89 +0,0 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::convert::TryInto;
|
|
||||||
|
|
||||||
use crate::facet::value_encoding::f64_into_bytes;
|
|
||||||
use crate::{try_split_array_at, FieldId};
|
|
||||||
|
|
||||||
// TODO do not de/serialize right bound when level = 0
|
|
||||||
pub struct FacetLevelValueF64Codec;
|
|
||||||
|
|
||||||
impl<'a> heed::BytesDecode<'a> for FacetLevelValueF64Codec {
|
|
||||||
type DItem = (FieldId, u8, f64, f64);
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
|
||||||
let field_id = u16::from_be_bytes(field_id_bytes);
|
|
||||||
let (level, bytes) = bytes.split_first()?;
|
|
||||||
|
|
||||||
let (left, right) = if *level != 0 {
|
|
||||||
let left = bytes[16..24].try_into().ok().map(f64::from_be_bytes)?;
|
|
||||||
let right = bytes[24..].try_into().ok().map(f64::from_be_bytes)?;
|
|
||||||
(left, right)
|
|
||||||
} else {
|
|
||||||
let left = bytes[8..].try_into().ok().map(f64::from_be_bytes)?;
|
|
||||||
(left, left)
|
|
||||||
};
|
|
||||||
|
|
||||||
Some((field_id, *level, left, right))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl heed::BytesEncode<'_> for FacetLevelValueF64Codec {
|
|
||||||
type EItem = (FieldId, u8, f64, f64);
|
|
||||||
|
|
||||||
fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
|
|
||||||
let mut buffer = [0u8; 32];
|
|
||||||
|
|
||||||
let len = if *level != 0 {
|
|
||||||
// Write the globally ordered floats.
|
|
||||||
let bytes = f64_into_bytes(*left)?;
|
|
||||||
buffer[..8].copy_from_slice(&bytes[..]);
|
|
||||||
|
|
||||||
let bytes = f64_into_bytes(*right)?;
|
|
||||||
buffer[8..16].copy_from_slice(&bytes[..]);
|
|
||||||
|
|
||||||
// Then the f64 values just to be able to read them back.
|
|
||||||
let bytes = left.to_be_bytes();
|
|
||||||
buffer[16..24].copy_from_slice(&bytes[..]);
|
|
||||||
|
|
||||||
let bytes = right.to_be_bytes();
|
|
||||||
buffer[24..].copy_from_slice(&bytes[..]);
|
|
||||||
|
|
||||||
32 // length
|
|
||||||
} else {
|
|
||||||
// Write the globally ordered floats.
|
|
||||||
let bytes = f64_into_bytes(*left)?;
|
|
||||||
buffer[..8].copy_from_slice(&bytes[..]);
|
|
||||||
|
|
||||||
// Then the f64 values just to be able to read them back.
|
|
||||||
let bytes = left.to_be_bytes();
|
|
||||||
buffer[8..16].copy_from_slice(&bytes[..]);
|
|
||||||
|
|
||||||
16 // length
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut bytes = Vec::with_capacity(len + 3);
|
|
||||||
bytes.extend_from_slice(&field_id.to_be_bytes());
|
|
||||||
bytes.push(*level);
|
|
||||||
bytes.extend_from_slice(&buffer[..len]);
|
|
||||||
Some(Cow::Owned(bytes))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use heed::{BytesDecode, BytesEncode};
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn globally_ordered_f64() {
|
|
||||||
let bytes = FacetLevelValueF64Codec::bytes_encode(&(3, 0, 32.0, 0.0)).unwrap();
|
|
||||||
let (name, level, left, right) = FacetLevelValueF64Codec::bytes_decode(&bytes).unwrap();
|
|
||||||
assert_eq!((name, level, left, right), (3, 0, 32.0, 32.0));
|
|
||||||
|
|
||||||
let bytes = FacetLevelValueF64Codec::bytes_encode(&(3, 1, -32.0, 32.0)).unwrap();
|
|
||||||
let (name, level, left, right) = FacetLevelValueF64Codec::bytes_decode(&bytes).unwrap();
|
|
||||||
assert_eq!((name, level, left, right), (3, 1, -32.0, 32.0));
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,53 +0,0 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::convert::TryInto;
|
|
||||||
use std::num::NonZeroU8;
|
|
||||||
|
|
||||||
use crate::{try_split_array_at, FieldId};
|
|
||||||
|
|
||||||
/// A codec that stores the field id, level 1 and higher and the groups ids.
|
|
||||||
///
|
|
||||||
/// It can only be used to encode the facet string of the level 1 or higher.
|
|
||||||
pub struct FacetLevelValueU32Codec;
|
|
||||||
|
|
||||||
impl<'a> heed::BytesDecode<'a> for FacetLevelValueU32Codec {
|
|
||||||
type DItem = (FieldId, NonZeroU8, u32, u32);
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
|
||||||
let field_id = u16::from_be_bytes(field_id_bytes);
|
|
||||||
let (level, bytes) = bytes.split_first()?;
|
|
||||||
let level = NonZeroU8::new(*level)?;
|
|
||||||
let left = bytes[8..12].try_into().ok().map(u32::from_be_bytes)?;
|
|
||||||
let right = bytes[12..].try_into().ok().map(u32::from_be_bytes)?;
|
|
||||||
Some((field_id, level, left, right))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl heed::BytesEncode<'_> for FacetLevelValueU32Codec {
|
|
||||||
type EItem = (FieldId, NonZeroU8, u32, u32);
|
|
||||||
|
|
||||||
fn bytes_encode((field_id, level, left, right): &Self::EItem) -> Option<Cow<[u8]>> {
|
|
||||||
let mut buffer = [0u8; 16];
|
|
||||||
|
|
||||||
// Write the big-endian integers.
|
|
||||||
let bytes = left.to_be_bytes();
|
|
||||||
buffer[..4].copy_from_slice(&bytes[..]);
|
|
||||||
|
|
||||||
let bytes = right.to_be_bytes();
|
|
||||||
buffer[4..8].copy_from_slice(&bytes[..]);
|
|
||||||
|
|
||||||
// Then the u32 values just to be able to read them back.
|
|
||||||
let bytes = left.to_be_bytes();
|
|
||||||
buffer[8..12].copy_from_slice(&bytes[..]);
|
|
||||||
|
|
||||||
let bytes = right.to_be_bytes();
|
|
||||||
buffer[12..].copy_from_slice(&bytes[..]);
|
|
||||||
|
|
||||||
let mut bytes = Vec::with_capacity(buffer.len() + 2 + 1);
|
|
||||||
bytes.extend_from_slice(&field_id.to_be_bytes());
|
|
||||||
bytes.push(level.get());
|
|
||||||
bytes.extend_from_slice(&buffer);
|
|
||||||
|
|
||||||
Some(Cow::Owned(bytes))
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,50 +0,0 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::str;
|
|
||||||
|
|
||||||
use crate::{try_split_array_at, FieldId};
|
|
||||||
|
|
||||||
/// A codec that stores the field id, level 0, and facet string.
|
|
||||||
///
|
|
||||||
/// It can only be used to encode the facet string of the level 0,
|
|
||||||
/// as it hardcodes the level.
|
|
||||||
///
|
|
||||||
/// We encode the level 0 to not break the lexicographical ordering of the LMDB keys,
|
|
||||||
/// and make sure that the levels are not mixed-up. The level 0 is special, the key
|
|
||||||
/// are strings, other levels represent groups and keys are simply two integers.
|
|
||||||
pub struct FacetStringLevelZeroCodec;
|
|
||||||
|
|
||||||
impl FacetStringLevelZeroCodec {
|
|
||||||
pub fn serialize_into(field_id: FieldId, value: &str, out: &mut Vec<u8>) {
|
|
||||||
out.reserve(value.len() + 2);
|
|
||||||
out.extend_from_slice(&field_id.to_be_bytes());
|
|
||||||
out.push(0); // the level zero (for LMDB ordering only)
|
|
||||||
out.extend_from_slice(value.as_bytes());
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> heed::BytesDecode<'a> for FacetStringLevelZeroCodec {
|
|
||||||
type DItem = (FieldId, &'a str);
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
let (field_id_bytes, bytes) = try_split_array_at(bytes)?;
|
|
||||||
let field_id = u16::from_be_bytes(field_id_bytes);
|
|
||||||
let (level, bytes) = bytes.split_first()?;
|
|
||||||
|
|
||||||
if *level != 0 {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let value = str::from_utf8(bytes).ok()?;
|
|
||||||
Some((field_id, value))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> heed::BytesEncode<'a> for FacetStringLevelZeroCodec {
|
|
||||||
type EItem = (FieldId, &'a str);
|
|
||||||
|
|
||||||
fn bytes_encode((field_id, value): &Self::EItem) -> Option<Cow<[u8]>> {
|
|
||||||
let mut bytes = Vec::new();
|
|
||||||
FacetStringLevelZeroCodec::serialize_into(*field_id, value, &mut bytes);
|
|
||||||
Some(Cow::Owned(bytes))
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,90 +0,0 @@
|
|||||||
use std::borrow::Cow;
|
|
||||||
use std::convert::TryInto;
|
|
||||||
use std::{marker, str};
|
|
||||||
|
|
||||||
use crate::error::SerializationError;
|
|
||||||
use crate::heed_codec::RoaringBitmapCodec;
|
|
||||||
use crate::{try_split_array_at, try_split_at, Result};
|
|
||||||
|
|
||||||
pub type FacetStringLevelZeroValueCodec = StringValueCodec<RoaringBitmapCodec>;
|
|
||||||
|
|
||||||
/// A codec that encodes a string in front of a value.
|
|
||||||
///
|
|
||||||
/// The usecase is for the facet string levels algorithm where we must know the
|
|
||||||
/// original string of a normalized facet value, the original values are stored
|
|
||||||
/// in the value to not break the lexicographical ordering of the LMDB keys.
|
|
||||||
pub struct StringValueCodec<C>(marker::PhantomData<C>);
|
|
||||||
|
|
||||||
impl<'a, C> heed::BytesDecode<'a> for StringValueCodec<C>
|
|
||||||
where
|
|
||||||
C: heed::BytesDecode<'a>,
|
|
||||||
{
|
|
||||||
type DItem = (&'a str, C::DItem);
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
let (string, bytes) = decode_prefix_string(bytes)?;
|
|
||||||
C::bytes_decode(bytes).map(|item| (string, item))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a, C> heed::BytesEncode<'a> for StringValueCodec<C>
|
|
||||||
where
|
|
||||||
C: heed::BytesEncode<'a>,
|
|
||||||
{
|
|
||||||
type EItem = (&'a str, C::EItem);
|
|
||||||
|
|
||||||
fn bytes_encode((string, value): &'a Self::EItem) -> Option<Cow<[u8]>> {
|
|
||||||
let value_bytes = C::bytes_encode(&value)?;
|
|
||||||
|
|
||||||
let mut bytes = Vec::with_capacity(2 + string.len() + value_bytes.len());
|
|
||||||
encode_prefix_string(string, &mut bytes).ok()?;
|
|
||||||
bytes.extend_from_slice(&value_bytes[..]);
|
|
||||||
|
|
||||||
Some(Cow::Owned(bytes))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn decode_prefix_string(value: &[u8]) -> Option<(&str, &[u8])> {
|
|
||||||
let (original_length_bytes, bytes) = try_split_array_at(value)?;
|
|
||||||
let original_length = u16::from_be_bytes(original_length_bytes) as usize;
|
|
||||||
let (string, bytes) = try_split_at(bytes, original_length)?;
|
|
||||||
let string = str::from_utf8(string).ok()?;
|
|
||||||
Some((string, bytes))
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn encode_prefix_string(string: &str, buffer: &mut Vec<u8>) -> Result<()> {
|
|
||||||
let string_len: u16 =
|
|
||||||
string.len().try_into().map_err(|_| SerializationError::InvalidNumberSerialization)?;
|
|
||||||
buffer.extend_from_slice(&string_len.to_be_bytes());
|
|
||||||
buffer.extend_from_slice(string.as_bytes());
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
|
|
||||||
#[cfg(test)]
|
|
||||||
mod tests {
|
|
||||||
use heed::types::Unit;
|
|
||||||
use heed::{BytesDecode, BytesEncode};
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use super::*;
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn deserialize_roaring_bitmaps() {
|
|
||||||
let string = "abc";
|
|
||||||
let docids: RoaringBitmap = (0..100).chain(3500..4398).collect();
|
|
||||||
let key = (string, docids.clone());
|
|
||||||
let bytes = StringValueCodec::<RoaringBitmapCodec>::bytes_encode(&key).unwrap();
|
|
||||||
let (out_string, out_docids) =
|
|
||||||
StringValueCodec::<RoaringBitmapCodec>::bytes_decode(&bytes).unwrap();
|
|
||||||
assert_eq!((out_string, out_docids), (string, docids));
|
|
||||||
}
|
|
||||||
|
|
||||||
#[test]
|
|
||||||
fn deserialize_unit() {
|
|
||||||
let string = "def";
|
|
||||||
let key = (string, ());
|
|
||||||
let bytes = StringValueCodec::<Unit>::bytes_encode(&key).unwrap();
|
|
||||||
let (out_string, out_unit) = StringValueCodec::<Unit>::bytes_decode(&bytes).unwrap();
|
|
||||||
assert_eq!((out_string, out_unit), (string, ()));
|
|
||||||
}
|
|
||||||
}
|
|
@ -1,19 +1,21 @@
|
|||||||
mod facet_level_value_f64_codec;
|
// mod facet_level_value_f64_codec;
|
||||||
mod facet_level_value_u32_codec;
|
// mod facet_level_value_u32_codec;
|
||||||
mod facet_string_level_zero_codec;
|
// mod facet_string_level_zero_codec;
|
||||||
mod facet_string_level_zero_value_codec;
|
// mod facet_string_level_zero_value_codec;
|
||||||
mod facet_string_zero_bounds_value_codec;
|
mod facet_string_zero_bounds_value_codec;
|
||||||
mod field_doc_id_facet_f64_codec;
|
mod field_doc_id_facet_f64_codec;
|
||||||
mod field_doc_id_facet_string_codec;
|
mod field_doc_id_facet_string_codec;
|
||||||
|
|
||||||
|
pub mod new;
|
||||||
|
|
||||||
use heed::types::OwnedType;
|
use heed::types::OwnedType;
|
||||||
|
|
||||||
pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
|
// pub use self::facet_level_value_f64_codec::FacetLevelValueF64Codec;
|
||||||
pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec;
|
// pub use self::facet_level_value_u32_codec::FacetLevelValueU32Codec;
|
||||||
pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec;
|
// pub use self::facet_string_level_zero_codec::FacetStringLevelZeroCodec;
|
||||||
pub use self::facet_string_level_zero_value_codec::{
|
// pub use self::facet_string_level_zero_value_codec::{
|
||||||
decode_prefix_string, encode_prefix_string, FacetStringLevelZeroValueCodec,
|
// decode_prefix_string, encode_prefix_string, FacetStringLevelZeroValueCodec,
|
||||||
};
|
// };
|
||||||
pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec;
|
pub use self::facet_string_zero_bounds_value_codec::FacetStringZeroBoundsValueCodec;
|
||||||
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
pub use self::field_doc_id_facet_f64_codec::FieldDocIdFacetF64Codec;
|
||||||
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
pub use self::field_doc_id_facet_string_codec::FieldDocIdFacetStringCodec;
|
||||||
|
148
milli/src/heed_codec/facet/new/mod.rs
Normal file
148
milli/src/heed_codec/facet/new/mod.rs
Normal file
@ -0,0 +1,148 @@
|
|||||||
|
use heed::{BytesDecode, BytesEncode};
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use std::{borrow::Cow, convert::TryFrom, marker::PhantomData};
|
||||||
|
|
||||||
|
pub mod ordered_f64_codec;
|
||||||
|
pub mod str_ref;
|
||||||
|
// TODO: these codecs were quickly written and not fast/resilient enough
|
||||||
|
|
||||||
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||||
|
pub struct FacetKey<T> {
|
||||||
|
pub field_id: u16,
|
||||||
|
pub level: u8,
|
||||||
|
pub left_bound: T,
|
||||||
|
}
|
||||||
|
impl<'a> FacetKey<&'a [u8]> {
|
||||||
|
pub fn into_owned(self) -> FacetKey<Vec<u8>> {
|
||||||
|
FacetKey {
|
||||||
|
field_id: self.field_id,
|
||||||
|
level: self.level,
|
||||||
|
left_bound: self.left_bound.to_vec(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> FacetKey<Vec<u8>> {
|
||||||
|
pub fn as_ref(&self) -> FacetKey<&[u8]> {
|
||||||
|
FacetKey {
|
||||||
|
field_id: self.field_id,
|
||||||
|
level: self.level,
|
||||||
|
left_bound: self.left_bound.as_slice(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FacetGroupValue {
|
||||||
|
pub size: u8,
|
||||||
|
pub bitmap: RoaringBitmap,
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FacetKeyCodec<T> {
|
||||||
|
_phantom: PhantomData<T>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a, T> heed::BytesEncode<'a> for FacetKeyCodec<T>
|
||||||
|
where
|
||||||
|
T: BytesEncode<'a>,
|
||||||
|
T::EItem: Sized,
|
||||||
|
{
|
||||||
|
type EItem = FacetKey<T::EItem>;
|
||||||
|
|
||||||
|
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
let mut v = vec![];
|
||||||
|
v.extend_from_slice(&value.field_id.to_be_bytes());
|
||||||
|
v.extend_from_slice(&[value.level]);
|
||||||
|
|
||||||
|
let bound = T::bytes_encode(&value.left_bound).unwrap();
|
||||||
|
v.extend_from_slice(&bound);
|
||||||
|
|
||||||
|
Some(Cow::Owned(v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a, T> heed::BytesDecode<'a> for FacetKeyCodec<T>
|
||||||
|
where
|
||||||
|
T: BytesDecode<'a>,
|
||||||
|
{
|
||||||
|
type DItem = FacetKey<T::DItem>;
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let fid = u16::from_be_bytes(<[u8; 2]>::try_from(&bytes[0..=1]).unwrap());
|
||||||
|
let level = bytes[2];
|
||||||
|
let bound = T::bytes_decode(&bytes[3..]).unwrap();
|
||||||
|
Some(FacetKey { field_id: fid, level, left_bound: bound })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct FacetGroupValueCodec;
|
||||||
|
impl<'a> heed::BytesEncode<'a> for FacetGroupValueCodec {
|
||||||
|
type EItem = FacetGroupValue;
|
||||||
|
|
||||||
|
fn bytes_encode(value: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
let mut v = vec![];
|
||||||
|
v.push(value.size);
|
||||||
|
value.bitmap.serialize_into(&mut v).unwrap();
|
||||||
|
Some(Cow::Owned(v))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||||
|
type DItem = FacetGroupValue;
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let size = bytes[0];
|
||||||
|
let bitmap = RoaringBitmap::deserialize_from(&bytes[1..]).unwrap();
|
||||||
|
Some(FacetGroupValue { size, bitmap })
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// TODO: get rid of this codec as it is named confusingly + should really be part of heed
|
||||||
|
// or even replace the current ByteSlice codec
|
||||||
|
pub struct MyByteSlice;
|
||||||
|
|
||||||
|
impl<'a> BytesEncode<'a> for MyByteSlice {
|
||||||
|
type EItem = &'a [u8];
|
||||||
|
|
||||||
|
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
Some(Cow::Borrowed(item))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> BytesDecode<'a> for MyByteSlice {
|
||||||
|
type DItem = &'a [u8];
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
Some(bytes)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// I won't need these ones anymore
|
||||||
|
// pub struct U16Codec;
|
||||||
|
// impl<'a> BytesEncode<'a> for U16Codec {
|
||||||
|
// type EItem = u16;
|
||||||
|
|
||||||
|
// fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
// Some(Cow::Owned(item.to_be_bytes().to_vec()))
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// impl<'a> BytesDecode<'a> for U16Codec {
|
||||||
|
// type DItem = u16;
|
||||||
|
|
||||||
|
// fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
// Some(u16::from_be_bytes(bytes[0..=1].try_into().unwrap()))
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// pub struct StrCodec;
|
||||||
|
// impl<'a> BytesEncode<'a> for StrCodec {
|
||||||
|
// type EItem = &'a str;
|
||||||
|
|
||||||
|
// fn bytes_encode(item: &'a &'a str) -> Option<Cow<'a, [u8]>> {
|
||||||
|
// Some(Cow::Borrowed(item.as_bytes()))
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// impl<'a> BytesDecode<'a> for StrCodec {
|
||||||
|
// type DItem = &'a str;
|
||||||
|
|
||||||
|
// fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
// let s = std::str::from_utf8(bytes).unwrap();
|
||||||
|
// Some(s)
|
||||||
|
// }
|
||||||
|
// }
|
36
milli/src/heed_codec/facet/new/ordered_f64_codec.rs
Normal file
36
milli/src/heed_codec/facet/new/ordered_f64_codec.rs
Normal file
@ -0,0 +1,36 @@
|
|||||||
|
use std::{borrow::Cow, convert::TryInto};
|
||||||
|
|
||||||
|
use heed::BytesDecode;
|
||||||
|
|
||||||
|
use crate::facet::value_encoding::f64_into_bytes;
|
||||||
|
|
||||||
|
pub struct OrderedF64Codec;
|
||||||
|
|
||||||
|
impl<'a> BytesDecode<'a> for OrderedF64Codec {
|
||||||
|
type DItem = f64;
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
if bytes.len() < 16 {
|
||||||
|
return None;
|
||||||
|
}
|
||||||
|
let f = bytes[8..].try_into().ok().map(f64::from_be_bytes)?;
|
||||||
|
Some(f)
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl heed::BytesEncode<'_> for OrderedF64Codec {
|
||||||
|
type EItem = f64;
|
||||||
|
|
||||||
|
fn bytes_encode(f: &Self::EItem) -> Option<Cow<[u8]>> {
|
||||||
|
let mut buffer = [0u8; 16];
|
||||||
|
|
||||||
|
// write the globally ordered float
|
||||||
|
let bytes = f64_into_bytes(*f)?;
|
||||||
|
buffer[..8].copy_from_slice(&bytes[..]);
|
||||||
|
// Then the f64 value just to be able to read it back
|
||||||
|
let bytes = f.to_be_bytes();
|
||||||
|
buffer[8..16].copy_from_slice(&bytes[..]);
|
||||||
|
|
||||||
|
Some(Cow::Owned(buffer.to_vec()))
|
||||||
|
}
|
||||||
|
}
|
20
milli/src/heed_codec/facet/new/str_ref.rs
Normal file
20
milli/src/heed_codec/facet/new/str_ref.rs
Normal file
@ -0,0 +1,20 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use heed::{BytesDecode, BytesEncode};
|
||||||
|
|
||||||
|
pub struct StrRefCodec;
|
||||||
|
impl<'a> BytesEncode<'a> for StrRefCodec {
|
||||||
|
type EItem = &'a str;
|
||||||
|
|
||||||
|
fn bytes_encode(item: &'a &'a str) -> Option<Cow<'a, [u8]>> {
|
||||||
|
Some(Cow::Borrowed(item.as_bytes()))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
impl<'a> BytesDecode<'a> for StrRefCodec {
|
||||||
|
type DItem = &'a str;
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
let s = std::str::from_utf8(bytes).unwrap();
|
||||||
|
Some(s)
|
||||||
|
}
|
||||||
|
}
|
@ -13,9 +13,14 @@ use time::OffsetDateTime;
|
|||||||
|
|
||||||
use crate::error::{InternalError, UserError};
|
use crate::error::{InternalError, UserError};
|
||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
|
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||||
|
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
||||||
|
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKeyCodec};
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
// FacetLevelValueF64Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
||||||
FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec, FieldIdCodec,
|
FieldDocIdFacetF64Codec,
|
||||||
|
FieldDocIdFacetStringCodec,
|
||||||
|
FieldIdCodec,
|
||||||
};
|
};
|
||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||||
@ -123,10 +128,10 @@ pub struct Index {
|
|||||||
/// Maps the facet field id and the docids for which this field exists
|
/// Maps the facet field id and the docids for which this field exists
|
||||||
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
|
||||||
|
|
||||||
/// Maps the facet field id, level and the number with the docids that corresponds to it.
|
/// Maps the facet field id and ranges of numbers with the docids that corresponds to them.
|
||||||
pub facet_id_f64_docids: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
pub facet_id_f64_docids: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||||
/// Maps the facet field id and the string with the original string and docids that corresponds to it.
|
/// Maps the facet field id and ranges of strings with the docids that corresponds to them.
|
||||||
pub facet_id_string_docids: Database<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>,
|
pub facet_id_string_docids: Database<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
||||||
|
|
||||||
/// Maps the document id, the facet field id and the numbers.
|
/// Maps the document id, the facet field id and the numbers.
|
||||||
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
|
pub field_id_docid_facet_f64s: Database<FieldDocIdFacetF64Codec, Unit>,
|
||||||
|
@ -7,7 +7,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||||
use crate::search::facet::{FacetNumberIter, FacetStringIter};
|
// use crate::search::facet::FacetStringIter;
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
|
|
||||||
@ -186,23 +186,24 @@ fn facet_ordered<'t>(
|
|||||||
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
|
iterative_facet_string_ordered_iter(index, rtxn, field_id, is_ascending, candidates)?;
|
||||||
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
|
Ok(Box::new(number_iter.chain(string_iter).map(Ok)) as Box<dyn Iterator<Item = _>>)
|
||||||
} else {
|
} else {
|
||||||
let facet_number_fn = if is_ascending {
|
todo!()
|
||||||
FacetNumberIter::new_reducing
|
// let facet_number_fn = if is_ascending {
|
||||||
} else {
|
// FacetNumberIter::new_reducing
|
||||||
FacetNumberIter::new_reverse_reducing
|
// } else {
|
||||||
};
|
// FacetNumberIter::new_reverse_reducing
|
||||||
let number_iter = facet_number_fn(rtxn, index, field_id, candidates.clone())?
|
// };
|
||||||
.map(|res| res.map(|(_, docids)| docids));
|
// let number_iter = facet_number_fn(rtxn, index, field_id, candidates.clone())?
|
||||||
|
// .map(|res| res.map(|(_, docids)| docids));
|
||||||
|
|
||||||
let facet_string_fn = if is_ascending {
|
// let facet_string_fn = if is_ascending {
|
||||||
FacetStringIter::new_reducing
|
// FacetStringIter::new_reducing
|
||||||
} else {
|
// } else {
|
||||||
FacetStringIter::new_reverse_reducing
|
// FacetStringIter::new_reverse_reducing
|
||||||
};
|
// };
|
||||||
let string_iter = facet_string_fn(rtxn, index, field_id, candidates)?
|
// let string_iter = facet_string_fn(rtxn, index, field_id, candidates)?
|
||||||
.map(|res| res.map(|(_, _, docids)| docids));
|
// .map(|res| res.map(|(_, _, docids)| docids));
|
||||||
|
|
||||||
Ok(Box::new(number_iter.chain(string_iter)))
|
// Ok(Box::new(number_iter.chain(string_iter)))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6,6 +6,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{Distinct, DocIter};
|
use super::{Distinct, DocIter};
|
||||||
use crate::error::InternalError;
|
use crate::error::InternalError;
|
||||||
|
use crate::heed_codec::facet::new::FacetKey;
|
||||||
use crate::heed_codec::facet::*;
|
use crate::heed_codec::facet::*;
|
||||||
use crate::index::db_name;
|
use crate::index::db_name;
|
||||||
use crate::{DocumentId, FieldId, Index, Result};
|
use crate::{DocumentId, FieldId, Index, Result};
|
||||||
@ -47,13 +48,16 @@ impl<'a> FacetDistinctIter<'a> {
|
|||||||
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
|
fn facet_string_docids(&self, key: &str) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
self.index
|
self.index
|
||||||
.facet_id_string_docids
|
.facet_id_string_docids
|
||||||
.get(self.txn, &(self.distinct, key))
|
.get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key })
|
||||||
.map(|result| result.map(|(_original, docids)| docids))
|
.map(|opt| opt.map(|v| v.bitmap))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn facet_number_docids(&self, key: f64) -> heed::Result<Option<RoaringBitmap>> {
|
fn facet_number_docids(&self, key: f64) -> heed::Result<Option<RoaringBitmap>> {
|
||||||
// get facet docids on level 0
|
// get facet docids on level 0
|
||||||
self.index.facet_id_f64_docids.get(self.txn, &(self.distinct, 0, key, key))
|
self.index
|
||||||
|
.facet_id_f64_docids
|
||||||
|
.get(self.txn, &FacetKey { field_id: self.distinct, level: 0, left_bound: key })
|
||||||
|
.map(|opt| opt.map(|v| v.bitmap))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn distinct_string(&mut self, id: DocumentId) -> Result<()> {
|
fn distinct_string(&mut self, id: DocumentId) -> Result<()> {
|
||||||
|
@ -7,10 +7,8 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec};
|
||||||
FacetStringLevelZeroCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
// use crate::search::facet::FacetStringIter;
|
||||||
};
|
|
||||||
use crate::search::facet::{FacetNumberIter, FacetNumberRange, FacetStringIter};
|
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
|
|
||||||
/// The default number of values by facets that will
|
/// The default number of values by facets that will
|
||||||
@ -133,21 +131,22 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
distribution: &mut BTreeMap<String, u64>,
|
distribution: &mut BTreeMap<String, u64>,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
let iter =
|
todo!()
|
||||||
FacetNumberIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
|
// let iter =
|
||||||
|
// FacetNumberIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
|
||||||
|
|
||||||
for result in iter {
|
// for result in iter {
|
||||||
let (value, mut docids) = result?;
|
// let (value, mut docids) = result?;
|
||||||
docids &= candidates;
|
// docids &= candidates;
|
||||||
if !docids.is_empty() {
|
// if !docids.is_empty() {
|
||||||
distribution.insert(value.to_string(), docids.len());
|
// distribution.insert(value.to_string(), docids.len());
|
||||||
}
|
// }
|
||||||
if distribution.len() == self.max_values_per_facet {
|
// if distribution.len() == self.max_values_per_facet {
|
||||||
break;
|
// break;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
Ok(())
|
// Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn facet_strings_distribution_from_facet_levels(
|
fn facet_strings_distribution_from_facet_levels(
|
||||||
@ -156,21 +155,22 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
distribution: &mut BTreeMap<String, u64>,
|
distribution: &mut BTreeMap<String, u64>,
|
||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
let iter =
|
todo!()
|
||||||
FacetStringIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
|
// let iter =
|
||||||
|
// FacetStringIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?;
|
||||||
|
|
||||||
for result in iter {
|
// for result in iter {
|
||||||
let (_normalized, original, mut docids) = result?;
|
// let (_normalized, original, mut docids) = result?;
|
||||||
docids &= candidates;
|
// docids &= candidates;
|
||||||
if !docids.is_empty() {
|
// if !docids.is_empty() {
|
||||||
distribution.insert(original.to_string(), docids.len());
|
// distribution.insert(original.to_string(), docids.len());
|
||||||
}
|
// }
|
||||||
if distribution.len() == self.max_values_per_facet {
|
// if distribution.len() == self.max_values_per_facet {
|
||||||
break;
|
// break;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
Ok(())
|
// Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Placeholder search, a.k.a. no candidates were specified. We iterate throught the
|
/// Placeholder search, a.k.a. no candidates were specified. We iterate throught the
|
||||||
@ -179,41 +179,43 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
&self,
|
&self,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
) -> heed::Result<BTreeMap<String, u64>> {
|
) -> heed::Result<BTreeMap<String, u64>> {
|
||||||
let mut distribution = BTreeMap::new();
|
todo!()
|
||||||
|
// let mut distribution = BTreeMap::new();
|
||||||
|
|
||||||
let db = self.index.facet_id_f64_docids;
|
// let db = self.index.facet_id_f64_docids;
|
||||||
let range = FacetNumberRange::new(self.rtxn, db, field_id, 0, Unbounded, Unbounded)?;
|
// let range = FacetNumberRange::new(self.rtxn, db, field_id, 0, Unbounded, Unbounded)?;
|
||||||
|
|
||||||
for result in range {
|
// for result in range {
|
||||||
let ((_, _, value, _), docids) = result?;
|
// let ((_, _, value, _), docids) = result?;
|
||||||
distribution.insert(value.to_string(), docids.len());
|
// distribution.insert(value.to_string(), docids.len());
|
||||||
if distribution.len() == self.max_values_per_facet {
|
// if distribution.len() == self.max_values_per_facet {
|
||||||
break;
|
// break;
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
let iter = self
|
// let iter = self
|
||||||
.index
|
// .index
|
||||||
.facet_id_string_docids
|
// .facet_id_string_docids
|
||||||
.remap_key_type::<ByteSlice>()
|
// .remap_key_type::<ByteSlice>()
|
||||||
.prefix_iter(self.rtxn, &field_id.to_be_bytes())?
|
// .prefix_iter(self.rtxn, &field_id.to_be_bytes())?
|
||||||
.remap_key_type::<FacetStringLevelZeroCodec>();
|
// .remap_key_type::<FacetStringLevelZeroCodec>();
|
||||||
|
|
||||||
let mut normalized_distribution = BTreeMap::new();
|
// let mut normalized_distribution = BTreeMap::new();
|
||||||
for result in iter {
|
// for result in iter {
|
||||||
let ((_, normalized_value), (original_value, docids)) = result?;
|
// let ((_, normalized_value), group_value) = result?;
|
||||||
normalized_distribution.insert(normalized_value, (original_value, docids.len()));
|
// normalized_distribution
|
||||||
if normalized_distribution.len() == self.max_values_per_facet {
|
// .insert(normalized_value, (normalized_value, group_value.bitmap.len()));
|
||||||
break;
|
// if normalized_distribution.len() == self.max_values_per_facet {
|
||||||
}
|
// break;
|
||||||
}
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
let iter = normalized_distribution
|
// let iter = normalized_distribution
|
||||||
.into_iter()
|
// .into_iter()
|
||||||
.map(|(_normalized, (original, count))| (original.to_string(), count));
|
// .map(|(_normalized, (original, count))| (original.to_string(), count));
|
||||||
distribution.extend(iter);
|
// distribution.extend(iter);
|
||||||
|
|
||||||
Ok(distribution)
|
// Ok(distribution)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> {
|
fn facet_values(&self, field_id: FieldId) -> heed::Result<BTreeMap<String, u64>> {
|
||||||
|
@ -1,248 +1,335 @@
|
|||||||
use std::ops::Bound::{self, Excluded, Included, Unbounded};
|
// use std::ops::Bound::{self, Excluded, Included, Unbounded};
|
||||||
|
|
||||||
use either::Either::{self, Left, Right};
|
// use either::Either::{self, Left, Right};
|
||||||
use heed::types::{ByteSlice, DecodeIgnore};
|
// use heed::types::{ByteSlice, DecodeIgnore};
|
||||||
use heed::{Database, LazyDecode, RoRange, RoRevRange};
|
// use heed::{BytesDecode, BytesEncode, Database, Lazy, LazyDecode, RoRange, RoRevRange};
|
||||||
use roaring::RoaringBitmap;
|
// use obkv::Key;
|
||||||
|
// use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
// use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
// use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec};
|
||||||
use crate::{FieldId, Index};
|
// use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
|
// use crate::{FieldId, Index};
|
||||||
|
|
||||||
pub struct FacetNumberRange<'t> {
|
// pub struct FacetNumberRange<'t, 'e> {
|
||||||
iter: RoRange<'t, FacetLevelValueF64Codec, LazyDecode<CboRoaringBitmapCodec>>,
|
// rtxn: &'t heed::RoTxn<'e>,
|
||||||
end: Bound<f64>,
|
// db: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||||
}
|
// iter: RoRange<'t, FacetKeyCodec<OrderedF64Codec>, LazyDecode<FacetGroupValueCodec>>,
|
||||||
|
// max_bound: f64,
|
||||||
|
// previous: Option<(FacetKey<f64>, Lazy<'t, FacetGroupValueCodec>)>,
|
||||||
|
// field_id: FieldId,
|
||||||
|
// end: Bound<f64>,
|
||||||
|
// }
|
||||||
|
|
||||||
impl<'t> FacetNumberRange<'t> {
|
// impl<'t, 'e> FacetNumberRange<'t, 'e> {
|
||||||
pub fn new(
|
// pub fn new(
|
||||||
rtxn: &'t heed::RoTxn,
|
// rtxn: &'t heed::RoTxn<'e>,
|
||||||
db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
// db: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||||
field_id: FieldId,
|
// field_id: FieldId,
|
||||||
level: u8,
|
// level: u8,
|
||||||
left: Bound<f64>,
|
// left: Bound<f64>,
|
||||||
right: Bound<f64>,
|
// right: Bound<f64>,
|
||||||
) -> heed::Result<FacetNumberRange<'t>> {
|
// ) -> heed::Result<FacetNumberRange<'t, 'e>> {
|
||||||
let left_bound = match left {
|
// let left_bound = match left {
|
||||||
Included(left) => Included((field_id, level, left, f64::MIN)),
|
// Included(left_bound) => Included(FacetKey { field_id, level, left_bound }),
|
||||||
Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
|
// Excluded(left_bound) => Excluded(FacetKey { field_id, level, left_bound }),
|
||||||
Unbounded => Included((field_id, level, f64::MIN, f64::MIN)),
|
// Unbounded => Included(FacetKey { field_id, level, left_bound: f64::MIN }),
|
||||||
};
|
// };
|
||||||
let right_bound = Included((field_id, level, f64::MAX, f64::MAX));
|
|
||||||
let iter = db.lazily_decode_data().range(rtxn, &(left_bound, right_bound))?;
|
|
||||||
Ok(FacetNumberRange { iter, end: right })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'t> Iterator for FacetNumberRange<'t> {
|
// let mut iter = db.lazily_decode_data().range(rtxn, &(left_bound, Unbounded))?;
|
||||||
type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>;
|
// let mut previous = iter.next().transpose()?;
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
// // Compute the maximum end bound by looking at the key of the last element in level 0
|
||||||
match self.iter.next() {
|
// let mut prefix_level_0 = vec![];
|
||||||
Some(Ok(((fid, level, left, right), docids))) => {
|
// prefix_level_0.extend_from_slice(&field_id.to_be_bytes());
|
||||||
let must_be_returned = match self.end {
|
// prefix_level_0.push(level);
|
||||||
Included(end) => right <= end,
|
|
||||||
Excluded(end) => right < end,
|
|
||||||
Unbounded => true,
|
|
||||||
};
|
|
||||||
if must_be_returned {
|
|
||||||
match docids.decode() {
|
|
||||||
Ok(docids) => Some(Ok(((fid, level, left, right), docids))),
|
|
||||||
Err(e) => Some(Err(e)),
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
|
||||||
}
|
|
||||||
Some(Err(e)) => Some(Err(e)),
|
|
||||||
None => None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FacetNumberRevRange<'t> {
|
// let mut rev_iter =
|
||||||
iter: RoRevRange<'t, FacetLevelValueF64Codec, LazyDecode<CboRoaringBitmapCodec>>,
|
// db.as_polymorph().rev_prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, &prefix_level_0)?;
|
||||||
end: Bound<f64>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'t> FacetNumberRevRange<'t> {
|
// let rev_iter_first = rev_iter.next().transpose()?;
|
||||||
pub fn new(
|
// let max_bound = if let Some((max_bound_key, _)) = rev_iter_first {
|
||||||
rtxn: &'t heed::RoTxn,
|
// let max_bound_key =
|
||||||
db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
// FacetKeyCodec::<OrderedF64Codec>::bytes_decode(max_bound_key).unwrap();
|
||||||
field_id: FieldId,
|
// max_bound_key.left_bound
|
||||||
level: u8,
|
// } else {
|
||||||
left: Bound<f64>,
|
// // I can't imagine when that would happen, but let's handle it correctly anyway
|
||||||
right: Bound<f64>,
|
// // by making the iterator empty
|
||||||
) -> heed::Result<FacetNumberRevRange<'t>> {
|
// previous = None;
|
||||||
let left_bound = match left {
|
// 0.0 // doesn't matter since previous = None so the iterator will always early exit
|
||||||
Included(left) => Included((field_id, level, left, f64::MIN)),
|
// // and return None itself
|
||||||
Excluded(left) => Excluded((field_id, level, left, f64::MIN)),
|
// };
|
||||||
Unbounded => Included((field_id, level, f64::MIN, f64::MIN)),
|
|
||||||
};
|
|
||||||
let right_bound = Included((field_id, level, f64::MAX, f64::MAX));
|
|
||||||
let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?;
|
|
||||||
Ok(FacetNumberRevRange { iter, end: right })
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'t> Iterator for FacetNumberRevRange<'t> {
|
// Ok(FacetNumberRange { rtxn, db, iter, field_id, previous, max_bound, end: right })
|
||||||
type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>;
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
// impl<'t, 'e> Iterator for FacetNumberRange<'t, 'e> {
|
||||||
loop {
|
// type Item = heed::Result<(FacetKey<f64>, RoaringBitmap)>;
|
||||||
match self.iter.next() {
|
|
||||||
Some(Ok(((fid, level, left, right), docids))) => {
|
|
||||||
let must_be_returned = match self.end {
|
|
||||||
Included(end) => right <= end,
|
|
||||||
Excluded(end) => right < end,
|
|
||||||
Unbounded => true,
|
|
||||||
};
|
|
||||||
if must_be_returned {
|
|
||||||
match docids.decode() {
|
|
||||||
Ok(docids) => return Some(Ok(((fid, level, left, right), docids))),
|
|
||||||
Err(e) => return Some(Err(e)),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
Some(Err(e)) => return Some(Err(e)),
|
|
||||||
None => return None,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub struct FacetNumberIter<'t> {
|
// fn next(&mut self) -> Option<Self::Item> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
// // The idea here is to return the **previous** element only if the left
|
||||||
db: Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
// // bound of the current key fits within the range given to the iter
|
||||||
field_id: FieldId,
|
// // if it doesn't, then there is still a chance that it must be returned,
|
||||||
level_iters: Vec<(RoaringBitmap, Either<FacetNumberRange<'t>, FacetNumberRevRange<'t>>)>,
|
// // but we need to check the actual right bound of the group by looking for
|
||||||
must_reduce: bool,
|
// // the key preceding the first key of the next group in level 0
|
||||||
}
|
|
||||||
|
|
||||||
impl<'t> FacetNumberIter<'t> {
|
// let (prev_key, prev_value) = self.previous?;
|
||||||
/// Create a `FacetNumberIter` that will iterate on the different facet entries
|
|
||||||
/// (facet value + documents ids) and that will reduce the given documents ids
|
|
||||||
/// while iterating on the different facet levels.
|
|
||||||
pub fn new_reducing(
|
|
||||||
rtxn: &'t heed::RoTxn,
|
|
||||||
index: &'t Index,
|
|
||||||
field_id: FieldId,
|
|
||||||
documents_ids: RoaringBitmap,
|
|
||||||
) -> heed::Result<FacetNumberIter<'t>> {
|
|
||||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
|
|
||||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
|
||||||
let highest_iter =
|
|
||||||
FacetNumberRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
|
||||||
let level_iters = vec![(documents_ids, Left(highest_iter))];
|
|
||||||
Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: true })
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Create a `FacetNumberIter` that will iterate on the different facet entries in reverse
|
// let (next_left_bound, next_previous) = if let Some(next) = self.iter.next() {
|
||||||
/// (facet value + documents ids) and that will reduce the given documents ids
|
// let (key, group_value) = match next {
|
||||||
/// while iterating on the different facet levels.
|
// Ok(n) => n,
|
||||||
pub fn new_reverse_reducing(
|
// Err(e) => return Some(Err(e)),
|
||||||
rtxn: &'t heed::RoTxn,
|
// };
|
||||||
index: &'t Index,
|
// (key.left_bound, Some((key, group_value)))
|
||||||
field_id: FieldId,
|
// } else {
|
||||||
documents_ids: RoaringBitmap,
|
// // we're at the end of the level iter, so we need to fetch the max bound instead
|
||||||
) -> heed::Result<FacetNumberIter<'t>> {
|
// (self.max_bound, None)
|
||||||
let db = index.facet_id_f64_docids;
|
// };
|
||||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
// let must_be_returned = match self.end {
|
||||||
let highest_iter =
|
// Included(end) => next_left_bound <= end,
|
||||||
FacetNumberRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
// Excluded(end) => next_left_bound < end,
|
||||||
let level_iters = vec![(documents_ids, Right(highest_iter))];
|
// Unbounded => true,
|
||||||
Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: true })
|
// };
|
||||||
}
|
// if must_be_returned {
|
||||||
|
// match prev_value.decode() {
|
||||||
|
// Ok(group_value) => {
|
||||||
|
// self.previous = next_previous;
|
||||||
|
// Some(Ok((prev_key, group_value.bitmap)))
|
||||||
|
// }
|
||||||
|
// Err(e) => Some(Err(e)),
|
||||||
|
// }
|
||||||
|
// } else {
|
||||||
|
// // it still possible that we want to return the value (one last time)
|
||||||
|
// // but to do so, we need to fetch the right bound of the current group
|
||||||
|
// // this is done by getting the first element at level 0 of the next group
|
||||||
|
// // then iterating in reverse from it
|
||||||
|
// // once we have the right bound, we can compare it, and then return or not
|
||||||
|
// // then we still set self.previous to None so that no other element can return
|
||||||
|
// // from it?
|
||||||
|
// let mut level_0_key_prefix = vec![];
|
||||||
|
// level_0_key_prefix.extend_from_slice(&self.field_id.to_be_bytes());
|
||||||
|
// level_0_key_prefix.push(0);
|
||||||
|
// let key =
|
||||||
|
// FacetKey::<f64> { field_id: self.field_id, level: 0, left_bound: next_left_bound };
|
||||||
|
// let key_bytes = FacetKeyCodec::<OrderedF64Codec>::bytes_encode(&key).unwrap();
|
||||||
|
// level_0_key_prefix.extend_from_slice(&key_bytes);
|
||||||
|
|
||||||
/// Create a `FacetNumberIter` that will iterate on the different facet entries
|
// let mut rev_iter_next_group_level_0 = self
|
||||||
/// (facet value + documents ids) and that will not reduce the given documents ids
|
// .db
|
||||||
/// while iterating on the different facet levels, possibly returning multiple times
|
// .as_polymorph()
|
||||||
/// a document id associated with multiple facet values.
|
// .rev_prefix_iter::<_, ByteSlice, ByteSlice>(&self.rtxn, &level_0_key_prefix)
|
||||||
pub fn new_non_reducing(
|
// .unwrap();
|
||||||
rtxn: &'t heed::RoTxn,
|
// let (key_for_right_bound, _) = rev_iter_next_group_level_0.next().unwrap().unwrap();
|
||||||
index: &'t Index,
|
// let key_for_right_bound =
|
||||||
field_id: FieldId,
|
// FacetKeyCodec::<OrderedF64Codec>::bytes_decode(key_for_right_bound).unwrap();
|
||||||
documents_ids: RoaringBitmap,
|
// let right_bound = key_for_right_bound.left_bound;
|
||||||
) -> heed::Result<FacetNumberIter<'t>> {
|
// let must_be_returned = match self.end {
|
||||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
|
// Included(end) => right_bound <= end,
|
||||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
// Excluded(end) => right_bound < end,
|
||||||
let highest_iter =
|
// Unbounded => unreachable!(),
|
||||||
FacetNumberRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
// };
|
||||||
let level_iters = vec![(documents_ids, Left(highest_iter))];
|
// self.previous = None;
|
||||||
Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: false })
|
// if must_be_returned {
|
||||||
}
|
// match prev_value.decode() {
|
||||||
|
// Ok(group_value) => Some(Ok((prev_key, group_value.bitmap))),
|
||||||
|
// Err(e) => Some(Err(e)),
|
||||||
|
// }
|
||||||
|
// } else {
|
||||||
|
// None
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
fn highest_level<X>(
|
// pub struct FacetNumberRevRange<'t> {
|
||||||
rtxn: &'t heed::RoTxn,
|
// iter: RoRevRange<'t, FacetKeyCodec<OrderedF64Codec>, LazyDecode<FacetGroupValueCodec>>,
|
||||||
db: Database<FacetLevelValueF64Codec, X>,
|
// end: Bound<f64>,
|
||||||
fid: FieldId,
|
// }
|
||||||
) -> heed::Result<Option<u8>> {
|
|
||||||
let level = db
|
|
||||||
.remap_types::<ByteSlice, DecodeIgnore>()
|
|
||||||
.prefix_iter(rtxn, &fid.to_be_bytes())?
|
|
||||||
.remap_key_type::<FacetLevelValueF64Codec>()
|
|
||||||
.last()
|
|
||||||
.transpose()?
|
|
||||||
.map(|((_, level, _, _), _)| level);
|
|
||||||
Ok(level)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'t> Iterator for FacetNumberIter<'t> {
|
// impl<'t> FacetNumberRevRange<'t> {
|
||||||
type Item = heed::Result<(f64, RoaringBitmap)>;
|
// pub fn new(
|
||||||
|
// rtxn: &'t heed::RoTxn,
|
||||||
|
// db: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||||
|
// field_id: FieldId,
|
||||||
|
// level: u8,
|
||||||
|
// left: Bound<f64>,
|
||||||
|
// right: Bound<f64>,
|
||||||
|
// ) -> heed::Result<FacetNumberRevRange<'t>> {
|
||||||
|
// let left_bound = match left {
|
||||||
|
// Included(left) => Included(FacetKey { field_id, level, left_bound: left }),
|
||||||
|
// Excluded(left) => Excluded(FacetKey { field_id, level, left_bound: left }),
|
||||||
|
// Unbounded => Included(FacetKey { field_id, level, left_bound: f64::MIN }),
|
||||||
|
// };
|
||||||
|
// let right_bound = Included(FacetKey { field_id, level, left_bound: f64::MAX });
|
||||||
|
// let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?;
|
||||||
|
// Ok(FacetNumberRevRange { iter, end: right })
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
fn next(&mut self) -> Option<Self::Item> {
|
// impl<'t> Iterator for FacetNumberRevRange<'t> {
|
||||||
'outer: loop {
|
// type Item = heed::Result<(FacetKey<f64>, RoaringBitmap)>;
|
||||||
let (documents_ids, last) = self.level_iters.last_mut()?;
|
|
||||||
let is_ascending = last.is_left();
|
|
||||||
for result in last {
|
|
||||||
// If the last iterator must find an empty set of documents it means
|
|
||||||
// that we found all the documents in the sub level iterations already,
|
|
||||||
// we can pop this level iterator.
|
|
||||||
if documents_ids.is_empty() {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
|
|
||||||
match result {
|
// fn next(&mut self) -> Option<Self::Item> {
|
||||||
Ok(((_fid, level, left, right), mut docids)) => {
|
// loop {
|
||||||
docids &= &*documents_ids;
|
// match self.iter.next() {
|
||||||
if !docids.is_empty() {
|
// Some(Ok((FacetKey { field_id, level, left_bound }, docids))) => {
|
||||||
if self.must_reduce {
|
// let must_be_returned = match self.end {
|
||||||
*documents_ids -= &docids;
|
// Included(end) => todo!(), //right <= end,
|
||||||
}
|
// Excluded(end) => todo!(), //right < end,
|
||||||
|
// Unbounded => true,
|
||||||
|
// };
|
||||||
|
// if must_be_returned {
|
||||||
|
// match docids.decode() {
|
||||||
|
// Ok(docids) => {
|
||||||
|
// return Some(Ok((
|
||||||
|
// FacetKey { field_id, level, left_bound },
|
||||||
|
// docids.bitmap,
|
||||||
|
// )))
|
||||||
|
// }
|
||||||
|
// Err(e) => return Some(Err(e)),
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// continue;
|
||||||
|
// }
|
||||||
|
// Some(Err(e)) => return Some(Err(e)),
|
||||||
|
// None => return None,
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
if level == 0 {
|
// pub struct FacetNumberIter<'t, 'e> {
|
||||||
return Some(Ok((left, docids)));
|
// rtxn: &'t heed::RoTxn<'t>,
|
||||||
}
|
// db: Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||||
|
// field_id: FieldId,
|
||||||
|
// level_iters: Vec<(RoaringBitmap, Either<FacetNumberRange<'t, 'e>, FacetNumberRevRange<'t>>)>,
|
||||||
|
// must_reduce: bool,
|
||||||
|
// }
|
||||||
|
|
||||||
let rtxn = self.rtxn;
|
// impl<'t, 'e> FacetNumberIter<'t, 'e> {
|
||||||
let db = self.db;
|
// /// Create a `FacetNumberIter` that will iterate on the different facet entries
|
||||||
let fid = self.field_id;
|
// /// (facet value + documents ids) and that will reduce the given documents ids
|
||||||
let left = Included(left);
|
// /// while iterating on the different facet levels.
|
||||||
let right = Included(right);
|
// pub fn new_reducing(
|
||||||
|
// rtxn: &'t heed::RoTxn<'e>,
|
||||||
|
// index: &'t Index,
|
||||||
|
// field_id: FieldId,
|
||||||
|
// documents_ids: RoaringBitmap,
|
||||||
|
// ) -> heed::Result<FacetNumberIter<'t, 'e>> {
|
||||||
|
// let db = index.facet_id_f64_docids;
|
||||||
|
// let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||||
|
// let highest_iter =
|
||||||
|
// FacetNumberRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||||
|
// let level_iters = vec![(documents_ids, Left(highest_iter))];
|
||||||
|
// Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: true })
|
||||||
|
// }
|
||||||
|
|
||||||
let result = if is_ascending {
|
// /// Create a `FacetNumberIter` that will iterate on the different facet entries in reverse
|
||||||
FacetNumberRange::new(rtxn, db, fid, level - 1, left, right)
|
// /// (facet value + documents ids) and that will reduce the given documents ids
|
||||||
.map(Left)
|
// /// while iterating on the different facet levels.
|
||||||
} else {
|
// pub fn new_reverse_reducing(
|
||||||
FacetNumberRevRange::new(rtxn, db, fid, level - 1, left, right)
|
// rtxn: &'t heed::RoTxn<'e>,
|
||||||
.map(Right)
|
// index: &'t Index,
|
||||||
};
|
// field_id: FieldId,
|
||||||
|
// documents_ids: RoaringBitmap,
|
||||||
|
// ) -> heed::Result<FacetNumberIter<'t, 'e>> {
|
||||||
|
// let db = index.facet_id_f64_docids;
|
||||||
|
// let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||||
|
// let highest_iter =
|
||||||
|
// FacetNumberRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||||
|
// let level_iters = vec![(documents_ids, Right(highest_iter))];
|
||||||
|
// Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: true })
|
||||||
|
// }
|
||||||
|
|
||||||
match result {
|
// /// Create a `FacetNumberIter` that will iterate on the different facet entries
|
||||||
Ok(iter) => {
|
// /// (facet value + documents ids) and that will not reduce the given documents ids
|
||||||
self.level_iters.push((docids, iter));
|
// /// while iterating on the different facet levels, possibly returning multiple times
|
||||||
continue 'outer;
|
// /// a document id associated with multiple facet values.
|
||||||
}
|
// pub fn new_non_reducing(
|
||||||
Err(e) => return Some(Err(e)),
|
// rtxn: &'t heed::RoTxn<'e>,
|
||||||
}
|
// index: &'t Index,
|
||||||
}
|
// field_id: FieldId,
|
||||||
}
|
// documents_ids: RoaringBitmap,
|
||||||
Err(e) => return Some(Err(e)),
|
// ) -> heed::Result<FacetNumberIter<'t, 'e>> {
|
||||||
}
|
// let db = index.facet_id_f64_docids;
|
||||||
}
|
// let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||||
self.level_iters.pop();
|
// let highest_iter =
|
||||||
}
|
// FacetNumberRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||||
}
|
// let level_iters = vec![(documents_ids, Left(highest_iter))];
|
||||||
}
|
// Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: false })
|
||||||
|
// }
|
||||||
|
|
||||||
|
// fn highest_level<X>(
|
||||||
|
// rtxn: &'t heed::RoTxn,
|
||||||
|
// db: Database<FacetKeyCodec<OrderedF64Codec>, X>,
|
||||||
|
// fid: FieldId,
|
||||||
|
// ) -> heed::Result<Option<u8>> {
|
||||||
|
// let level = db
|
||||||
|
// .remap_types::<ByteSlice, DecodeIgnore>()
|
||||||
|
// .prefix_iter(rtxn, &fid.to_be_bytes())?
|
||||||
|
// .remap_key_type::<FacetKeyCodec<OrderedF64Codec>>()
|
||||||
|
// .last()
|
||||||
|
// .transpose()?
|
||||||
|
// .map(|(key, _)| key.level);
|
||||||
|
// Ok(level)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// impl<'t, 'e> Iterator for FacetNumberIter<'t, 'e> {
|
||||||
|
// type Item = heed::Result<(f64, RoaringBitmap)>;
|
||||||
|
|
||||||
|
// fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
// 'outer: loop {
|
||||||
|
// let (documents_ids, last) = self.level_iters.last_mut()?;
|
||||||
|
// let is_ascending = last.is_left();
|
||||||
|
// for result in last {
|
||||||
|
// // If the last iterator must find an empty set of documents it means
|
||||||
|
// // that we found all the documents in the sub level iterations already,
|
||||||
|
// // we can pop this level iterator.
|
||||||
|
// if documents_ids.is_empty() {
|
||||||
|
// break;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// match result {
|
||||||
|
// Ok((key, mut docids)) => {
|
||||||
|
// docids &= &*documents_ids;
|
||||||
|
// if !docids.is_empty() {
|
||||||
|
// if self.must_reduce {
|
||||||
|
// *documents_ids -= &docids;
|
||||||
|
// }
|
||||||
|
|
||||||
|
// if level == 0 {
|
||||||
|
// return Some(Ok((left, docids)));
|
||||||
|
// }
|
||||||
|
|
||||||
|
// let rtxn = self.rtxn;
|
||||||
|
// let db = self.db;
|
||||||
|
// let fid = self.field_id;
|
||||||
|
// let left = Included(left);
|
||||||
|
// let right = Included(right);
|
||||||
|
|
||||||
|
// let result = if is_ascending {
|
||||||
|
// FacetNumberRange::new(rtxn, db, fid, level - 1, left, right)
|
||||||
|
// .map(Left)
|
||||||
|
// } else {
|
||||||
|
// FacetNumberRevRange::new(rtxn, db, fid, level - 1, left, right)
|
||||||
|
// .map(Right)
|
||||||
|
// };
|
||||||
|
|
||||||
|
// match result {
|
||||||
|
// Ok(iter) => {
|
||||||
|
// self.level_iters.push((docids, iter));
|
||||||
|
// continue 'outer;
|
||||||
|
// }
|
||||||
|
// Err(e) => return Some(Err(e)),
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// Err(e) => return Some(Err(e)),
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// self.level_iters.pop();
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
File diff suppressed because it is too large
Load Diff
@ -1,16 +1,20 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::fmt::{Debug, Display};
|
use std::fmt::{Debug, Display};
|
||||||
use std::ops::Bound::{self, Excluded, Included};
|
use std::ops::Bound::{self, Excluded, Included};
|
||||||
|
use std::ops::RangeBounds;
|
||||||
|
|
||||||
use either::Either;
|
use either::Either;
|
||||||
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
|
pub use filter_parser::{Condition, Error as FPError, FilterCondition, Span, Token};
|
||||||
use heed::types::DecodeIgnore;
|
use heed::types::DecodeIgnore;
|
||||||
|
use heed::LazyDecode;
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::FacetNumberRange;
|
// use super::FacetNumberRange;
|
||||||
use crate::error::{Error, UserError};
|
use crate::error::{Error, UserError};
|
||||||
use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||||
|
use crate::heed_codec::facet::new::{FacetGroupValueCodec, FacetKey, FacetKeyCodec};
|
||||||
|
// use crate::heed_codec::facet::FacetLevelValueF64Codec;
|
||||||
use crate::{
|
use crate::{
|
||||||
distance_between_two_points, lat_lng_to_xyz, CboRoaringBitmapCodec, FieldId, Index, Result,
|
distance_between_two_points, lat_lng_to_xyz, CboRoaringBitmapCodec, FieldId, Index, Result,
|
||||||
};
|
};
|
||||||
@ -144,18 +148,29 @@ impl<'a> Filter<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn explore_facet_number_levels(
|
||||||
|
rtxn: &heed::RoTxn,
|
||||||
|
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||||
|
field_id: FieldId,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
impl<'a> Filter<'a> {
|
impl<'a> Filter<'a> {
|
||||||
/// Aggregates the documents ids that are part of the specified range automatically
|
/// Aggregates the documents ids that are part of the specified range automatically
|
||||||
/// going deeper through the levels.
|
/// going deeper through the levels.
|
||||||
fn explore_facet_number_levels(
|
fn explore_facet_number_levels(
|
||||||
rtxn: &heed::RoTxn,
|
rtxn: &heed::RoTxn,
|
||||||
db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, CboRoaringBitmapCodec>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
level: u8,
|
level: u8,
|
||||||
left: Bound<f64>,
|
left: Bound<f64>,
|
||||||
right: Bound<f64>,
|
right: Bound<f64>,
|
||||||
output: &mut RoaringBitmap,
|
output: &mut RoaringBitmap,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
// level must be > 0, I'll create a separate function for level 0
|
||||||
|
// if level == 0 {
|
||||||
|
// call that function
|
||||||
|
//}
|
||||||
match (left, right) {
|
match (left, right) {
|
||||||
// If the request is an exact value we must go directly to the deepest level.
|
// If the request is an exact value we must go directly to the deepest level.
|
||||||
(Included(l), Included(r)) if l == r && level > 0 => {
|
(Included(l), Included(r)) if l == r && level > 0 => {
|
||||||
@ -170,87 +185,121 @@ impl<'a> Filter<'a> {
|
|||||||
(Excluded(l), Included(r)) if l >= r => return Ok(()),
|
(Excluded(l), Included(r)) if l >= r => return Ok(()),
|
||||||
(_, _) => (),
|
(_, _) => (),
|
||||||
}
|
}
|
||||||
|
let range_start_key = FacetKey {
|
||||||
let mut left_found = None;
|
field_id,
|
||||||
let mut right_found = None;
|
level,
|
||||||
|
left_bound: match left {
|
||||||
// We must create a custom iterator to be able to iterate over the
|
Included(l) => l,
|
||||||
// requested range as the range iterator cannot express some conditions.
|
Excluded(l) => l,
|
||||||
let iter = FacetNumberRange::new(rtxn, db, field_id, level, left, right)?;
|
Bound::Unbounded => f64::MIN,
|
||||||
|
},
|
||||||
debug!("Iterating between {:?} and {:?} (level {})", left, right, level);
|
|
||||||
|
|
||||||
for (i, result) in iter.enumerate() {
|
|
||||||
let ((_fid, level, l, r), docids) = result?;
|
|
||||||
debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
|
|
||||||
*output |= docids;
|
|
||||||
// We save the leftest and rightest bounds we actually found at this level.
|
|
||||||
if i == 0 {
|
|
||||||
left_found = Some(l);
|
|
||||||
}
|
|
||||||
right_found = Some(r);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Can we go deeper?
|
|
||||||
let deeper_level = match level.checked_sub(1) {
|
|
||||||
Some(level) => level,
|
|
||||||
None => return Ok(()),
|
|
||||||
};
|
};
|
||||||
|
let mut range_iter = db
|
||||||
|
.remap_data_type::<LazyDecode<FacetGroupValueCodec>>()
|
||||||
|
.range(rtxn, &(range_start_key..))?;
|
||||||
|
|
||||||
// We must refine the left and right bounds of this range by retrieving the
|
let (mut previous_facet_key, mut previous_value) = range_iter.next().unwrap()?;
|
||||||
// missing part in a deeper level.
|
while let Some(el) = range_iter.next() {
|
||||||
match left_found.zip(right_found) {
|
let (facet_key, value) = el?;
|
||||||
Some((left_found, right_found)) => {
|
let range = (Included(previous_facet_key.left_bound), Excluded(facet_key.left_bound));
|
||||||
// If the bound is satisfied we avoid calling this function again.
|
// if the current range intersects with the query range, then go deeper
|
||||||
if !matches!(left, Included(l) if l == left_found) {
|
// what does it mean for two ranges to intersect?
|
||||||
let sub_right = Excluded(left_found);
|
let gte_left = match left {
|
||||||
debug!(
|
Included(l) => previous_facet_key.left_bound >= l,
|
||||||
"calling left with {:?} to {:?} (level {})",
|
Excluded(l) => previous_facet_key.left_bound > l, // TODO: not true?
|
||||||
left, sub_right, deeper_level
|
Bound::Unbounded => true,
|
||||||
);
|
};
|
||||||
Self::explore_facet_number_levels(
|
let lte_right = match right {
|
||||||
rtxn,
|
Included(r) => facet_key.left_bound <= r,
|
||||||
db,
|
Excluded(r) => facet_key.left_bound < r,
|
||||||
field_id,
|
Bound::Unbounded => true,
|
||||||
deeper_level,
|
};
|
||||||
left,
|
|
||||||
sub_right,
|
|
||||||
output,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
if !matches!(right, Included(r) if r == right_found) {
|
|
||||||
let sub_left = Excluded(right_found);
|
|
||||||
debug!(
|
|
||||||
"calling right with {:?} to {:?} (level {})",
|
|
||||||
sub_left, right, deeper_level
|
|
||||||
);
|
|
||||||
Self::explore_facet_number_levels(
|
|
||||||
rtxn,
|
|
||||||
db,
|
|
||||||
field_id,
|
|
||||||
deeper_level,
|
|
||||||
sub_left,
|
|
||||||
right,
|
|
||||||
output,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
None => {
|
|
||||||
// If we found nothing at this level it means that we must find
|
|
||||||
// the same bounds but at a deeper, more precise level.
|
|
||||||
Self::explore_facet_number_levels(
|
|
||||||
rtxn,
|
|
||||||
db,
|
|
||||||
field_id,
|
|
||||||
deeper_level,
|
|
||||||
left,
|
|
||||||
right,
|
|
||||||
output,
|
|
||||||
)?;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
// at this point, previous_facet_key and previous_value are the last groups in the level
|
||||||
|
// we must also check whether we should visit this group
|
||||||
|
|
||||||
Ok(())
|
todo!();
|
||||||
|
|
||||||
|
// let mut left_found = None;
|
||||||
|
// let mut right_found = None;
|
||||||
|
|
||||||
|
// // We must create a custom iterator to be able to iterate over the
|
||||||
|
// // requested range as the range iterator cannot express some conditions.
|
||||||
|
// let iter = FacetNumberRange::new(rtxn, db, field_id, level, left, right)?;
|
||||||
|
|
||||||
|
// debug!("Iterating between {:?} and {:?} (level {})", left, right, level);
|
||||||
|
|
||||||
|
// for (i, result) in iter.enumerate() {
|
||||||
|
// let ((_fid, level, l, r), docids) = result?;
|
||||||
|
// debug!("{:?} to {:?} (level {}) found {} documents", l, r, level, docids.len());
|
||||||
|
// *output |= docids;
|
||||||
|
// // We save the leftest and rightest bounds we actually found at this level.
|
||||||
|
// if i == 0 {
|
||||||
|
// left_found = Some(l);
|
||||||
|
// }
|
||||||
|
// right_found = Some(r);
|
||||||
|
// }
|
||||||
|
|
||||||
|
// // Can we go deeper?
|
||||||
|
// let deeper_level = match level.checked_sub(1) {
|
||||||
|
// Some(level) => level,
|
||||||
|
// None => return Ok(()),
|
||||||
|
// };
|
||||||
|
|
||||||
|
// // We must refine the left and right bounds of this range by retrieving the
|
||||||
|
// // missing part in a deeper level.
|
||||||
|
// match left_found.zip(right_found) {
|
||||||
|
// Some((left_found, right_found)) => {
|
||||||
|
// // If the bound is satisfied we avoid calling this function again.
|
||||||
|
// if !matches!(left, Included(l) if l == left_found) {
|
||||||
|
// let sub_right = Excluded(left_found);
|
||||||
|
// debug!(
|
||||||
|
// "calling left with {:?} to {:?} (level {})",
|
||||||
|
// left, sub_right, deeper_level
|
||||||
|
// );
|
||||||
|
// Self::explore_facet_number_levels(
|
||||||
|
// rtxn,
|
||||||
|
// db,
|
||||||
|
// field_id,
|
||||||
|
// deeper_level,
|
||||||
|
// left,
|
||||||
|
// sub_right,
|
||||||
|
// output,
|
||||||
|
// )?;
|
||||||
|
// }
|
||||||
|
// if !matches!(right, Included(r) if r == right_found) {
|
||||||
|
// let sub_left = Excluded(right_found);
|
||||||
|
// debug!(
|
||||||
|
// "calling right with {:?} to {:?} (level {})",
|
||||||
|
// sub_left, right, deeper_level
|
||||||
|
// );
|
||||||
|
// Self::explore_facet_number_levels(
|
||||||
|
// rtxn,
|
||||||
|
// db,
|
||||||
|
// field_id,
|
||||||
|
// deeper_level,
|
||||||
|
// sub_left,
|
||||||
|
// right,
|
||||||
|
// output,
|
||||||
|
// )?;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// None => {
|
||||||
|
// // If we found nothing at this level it means that we must find
|
||||||
|
// // the same bounds but at a deeper, more precise level.
|
||||||
|
// Self::explore_facet_number_levels(
|
||||||
|
// rtxn,
|
||||||
|
// db,
|
||||||
|
// field_id,
|
||||||
|
// deeper_level,
|
||||||
|
// left,
|
||||||
|
// right,
|
||||||
|
// output,
|
||||||
|
// )?;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn evaluate_operator(
|
fn evaluate_operator(
|
||||||
@ -277,23 +326,27 @@ impl<'a> Filter<'a> {
|
|||||||
return Ok(exist);
|
return Ok(exist);
|
||||||
}
|
}
|
||||||
Condition::Equal(val) => {
|
Condition::Equal(val) => {
|
||||||
let (_original_value, string_docids) = strings_db
|
let string_docids = strings_db
|
||||||
.get(rtxn, &(field_id, &val.value().to_lowercase()))?
|
.get(
|
||||||
|
rtxn,
|
||||||
|
&FacetKey { field_id, level: 0, left_bound: &val.value().to_lowercase() },
|
||||||
|
)?
|
||||||
|
.map(|v| v.bitmap)
|
||||||
.unwrap_or_default();
|
.unwrap_or_default();
|
||||||
let number = val.parse::<f64>().ok();
|
let number = val.parse::<f64>().ok();
|
||||||
let number_docids = match number {
|
let number_docids = match number {
|
||||||
Some(n) => {
|
Some(n) => {
|
||||||
let n = Included(n);
|
let n = Included(n);
|
||||||
let mut output = RoaringBitmap::new();
|
let mut output = RoaringBitmap::new();
|
||||||
Self::explore_facet_number_levels(
|
// Self::explore_facet_number_levels(
|
||||||
rtxn,
|
// rtxn,
|
||||||
numbers_db,
|
// numbers_db,
|
||||||
field_id,
|
// field_id,
|
||||||
0,
|
// 0,
|
||||||
n,
|
// n,
|
||||||
n,
|
// n,
|
||||||
&mut output,
|
// &mut output,
|
||||||
)?;
|
// )?;
|
||||||
output
|
output
|
||||||
}
|
}
|
||||||
None => RoaringBitmap::new(),
|
None => RoaringBitmap::new(),
|
||||||
@ -312,21 +365,32 @@ impl<'a> Filter<'a> {
|
|||||||
// that's fine if it don't, the value just before will be returned instead.
|
// that's fine if it don't, the value just before will be returned instead.
|
||||||
let biggest_level = numbers_db
|
let biggest_level = numbers_db
|
||||||
.remap_data_type::<DecodeIgnore>()
|
.remap_data_type::<DecodeIgnore>()
|
||||||
.get_lower_than_or_equal_to(rtxn, &(field_id, u8::MAX, f64::MAX, f64::MAX))?
|
.get_lower_than_or_equal_to(
|
||||||
.and_then(|((id, level, _, _), _)| if id == field_id { Some(level) } else { None });
|
rtxn,
|
||||||
|
&FacetKey { field_id, level: u8::MAX, left_bound: f64::MAX },
|
||||||
|
)?
|
||||||
|
.and_then(
|
||||||
|
|(FacetKey { field_id: id, level, .. }, _)| {
|
||||||
|
if id == field_id {
|
||||||
|
Some(level)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
},
|
||||||
|
);
|
||||||
|
|
||||||
match biggest_level {
|
match biggest_level {
|
||||||
Some(level) => {
|
Some(level) => {
|
||||||
let mut output = RoaringBitmap::new();
|
let mut output = RoaringBitmap::new();
|
||||||
Self::explore_facet_number_levels(
|
// Self::explore_facet_number_levels(
|
||||||
rtxn,
|
// rtxn,
|
||||||
numbers_db,
|
// numbers_db,
|
||||||
field_id,
|
// field_id,
|
||||||
level,
|
// level,
|
||||||
left,
|
// left,
|
||||||
right,
|
// right,
|
||||||
&mut output,
|
// &mut output,
|
||||||
)?;
|
// )?;
|
||||||
Ok(output)
|
Ok(output)
|
||||||
}
|
}
|
||||||
None => Ok(RoaringBitmap::new()),
|
None => Ok(RoaringBitmap::new()),
|
||||||
|
@ -1,6 +1,6 @@
|
|||||||
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
||||||
pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange};
|
// pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange};
|
||||||
pub use self::facet_string::FacetStringIter;
|
// pub use self::facet_string::FacetStringIter;
|
||||||
pub use self::filter::Filter;
|
pub use self::filter::Filter;
|
||||||
|
|
||||||
mod facet_distribution;
|
mod facet_distribution;
|
||||||
|
@ -15,7 +15,7 @@ use log::debug;
|
|||||||
use once_cell::sync::Lazy;
|
use once_cell::sync::Lazy;
|
||||||
use roaring::bitmap::RoaringBitmap;
|
use roaring::bitmap::RoaringBitmap;
|
||||||
|
|
||||||
pub use self::facet::{FacetDistribution, FacetNumberIter, Filter, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet::{FacetDistribution, /* FacetNumberIter,*/ Filter, DEFAULT_VALUES_PER_FACET,};
|
||||||
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
|
use self::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||||
pub use self::matches::{
|
pub use self::matches::{
|
||||||
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
|
FormatOptions, MatchBounds, Matcher, MatcherBuilder, MatchingWord, MatchingWords,
|
||||||
|
@ -6,10 +6,7 @@ use heed::types::ByteSlice;
|
|||||||
use heed::BytesDecode;
|
use heed::BytesDecode;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::FacetStringZeroBoundsValueCodec;
|
||||||
FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec,
|
|
||||||
FacetStringZeroBoundsValueCodec,
|
|
||||||
};
|
|
||||||
use crate::{make_db_snap_from_iter, CboRoaringBitmapCodec, ExternalDocumentsIds, Index};
|
use crate::{make_db_snap_from_iter, CboRoaringBitmapCodec, ExternalDocumentsIds, Index};
|
||||||
|
|
||||||
#[track_caller]
|
#[track_caller]
|
||||||
@ -232,46 +229,48 @@ pub fn snap_word_prefix_position_docids(index: &Index) -> String {
|
|||||||
snap
|
snap
|
||||||
}
|
}
|
||||||
pub fn snap_facet_id_f64_docids(index: &Index) -> String {
|
pub fn snap_facet_id_f64_docids(index: &Index) -> String {
|
||||||
let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |(
|
todo!()
|
||||||
(facet_id, level, left, right),
|
// let snap = make_db_snap_from_iter!(index, facet_id_f64_docids, |(
|
||||||
b,
|
// (facet_id, level, left, right),
|
||||||
)| {
|
// b,
|
||||||
&format!("{facet_id:<3} {level:<2} {left:<6} {right:<6} {}", display_bitmap(&b))
|
// )| {
|
||||||
});
|
// &format!("{facet_id:<3} {level:<2} {left:<6} {right:<6} {}", display_bitmap(&b))
|
||||||
snap
|
// });
|
||||||
|
// snap
|
||||||
}
|
}
|
||||||
pub fn snap_facet_id_string_docids(index: &Index) -> String {
|
pub fn snap_facet_id_string_docids(index: &Index) -> String {
|
||||||
let rtxn = index.read_txn().unwrap();
|
todo!()
|
||||||
let bytes_db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>();
|
// let rtxn = index.read_txn().unwrap();
|
||||||
let iter = bytes_db.iter(&rtxn).unwrap();
|
// let bytes_db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>();
|
||||||
let mut snap = String::new();
|
// let iter = bytes_db.iter(&rtxn).unwrap();
|
||||||
|
// let mut snap = String::new();
|
||||||
|
|
||||||
for x in iter {
|
// for x in iter {
|
||||||
let (key, value) = x.unwrap();
|
// let (key, value) = x.unwrap();
|
||||||
if let Some((field_id, normalized_str)) = FacetStringLevelZeroCodec::bytes_decode(key) {
|
// if let Some((field_id, normalized_str)) = FacetStringLevelZeroCodec::bytes_decode(key) {
|
||||||
let (orig_string, docids) =
|
// let (orig_string, docids) =
|
||||||
FacetStringLevelZeroValueCodec::bytes_decode(value).unwrap();
|
// FacetStringLevelZeroValueCodec::bytes_decode(value).unwrap();
|
||||||
snap.push_str(&format!(
|
// snap.push_str(&format!(
|
||||||
"{field_id:<3} {normalized_str:<8} {orig_string:<8} {}\n",
|
// "{field_id:<3} {normalized_str:<8} {orig_string:<8} {}\n",
|
||||||
display_bitmap(&docids)
|
// display_bitmap(&docids)
|
||||||
));
|
// ));
|
||||||
} else if let Some((field_id, level, left, right)) =
|
// } else if let Some((field_id, level, left, right)) =
|
||||||
FacetLevelValueU32Codec::bytes_decode(key)
|
// FacetLevelValueU32Codec::bytes_decode(key)
|
||||||
{
|
// {
|
||||||
snap.push_str(&format!("{field_id:<3} {level:<2} {left:<6} {right:<6} "));
|
// snap.push_str(&format!("{field_id:<3} {level:<2} {left:<6} {right:<6} "));
|
||||||
let (bounds, docids) =
|
// let (bounds, docids) =
|
||||||
FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(value)
|
// FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(value)
|
||||||
.unwrap();
|
// .unwrap();
|
||||||
if let Some((left, right)) = bounds {
|
// if let Some((left, right)) = bounds {
|
||||||
snap.push_str(&format!("{left:<8} {right:<8} "));
|
// snap.push_str(&format!("{left:<8} {right:<8} "));
|
||||||
}
|
// }
|
||||||
snap.push_str(&display_bitmap(&docids));
|
// snap.push_str(&display_bitmap(&docids));
|
||||||
snap.push('\n');
|
// snap.push('\n');
|
||||||
} else {
|
// } else {
|
||||||
panic!();
|
// panic!();
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
snap
|
// snap
|
||||||
}
|
}
|
||||||
pub fn snap_documents_ids(index: &Index) -> String {
|
pub fn snap_documents_ids(index: &Index) -> String {
|
||||||
let rtxn = index.read_txn().unwrap();
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
@ -10,9 +10,7 @@ use time::OffsetDateTime;
|
|||||||
|
|
||||||
use super::ClearDocuments;
|
use super::ClearDocuments;
|
||||||
use crate::error::{InternalError, SerializationError, UserError};
|
use crate::error::{InternalError, SerializationError, UserError};
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::FacetStringZeroBoundsValueCodec;
|
||||||
FacetLevelValueU32Codec, FacetStringLevelZeroValueCodec, FacetStringZeroBoundsValueCodec,
|
|
||||||
};
|
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::index::{db_name, main_key};
|
use crate::index::{db_name, main_key};
|
||||||
use crate::{
|
use crate::{
|
||||||
@ -442,11 +440,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// We delete the documents ids that are under the facet field id values.
|
// We delete the documents ids that are under the facet field id values.
|
||||||
remove_docids_from_facet_field_id_docids(
|
// TODO: remove_docids_from_facet_field_id_docids(
|
||||||
self.wtxn,
|
// self.wtxn,
|
||||||
facet_id_f64_docids,
|
// facet_id_f64_docids,
|
||||||
&self.to_delete_docids,
|
// &self.to_delete_docids,
|
||||||
)?;
|
// )?;
|
||||||
// We delete the documents ids that are under the facet field id values.
|
// We delete the documents ids that are under the facet field id values.
|
||||||
remove_docids_from_facet_field_id_docids(
|
remove_docids_from_facet_field_id_docids(
|
||||||
self.wtxn,
|
self.wtxn,
|
||||||
@ -587,57 +585,57 @@ fn remove_docids_from_facet_field_id_string_docids<'a, C, D>(
|
|||||||
db: &heed::Database<C, D>,
|
db: &heed::Database<C, D>,
|
||||||
to_remove: &RoaringBitmap,
|
to_remove: &RoaringBitmap,
|
||||||
) -> crate::Result<()> {
|
) -> crate::Result<()> {
|
||||||
let db_name = Some(crate::index::db_name::FACET_ID_STRING_DOCIDS);
|
// let db_name = Some(crate::index::db_name::FACET_ID_STRING_DOCIDS);
|
||||||
let mut iter = db.remap_types::<ByteSlice, ByteSlice>().iter_mut(wtxn)?;
|
// let mut iter = db.remap_types::<ByteSlice, ByteSlice>().iter_mut(wtxn)?;
|
||||||
while let Some(result) = iter.next() {
|
// while let Some(result) = iter.next() {
|
||||||
let (key, val) = result?;
|
// let (key, val) = result?;
|
||||||
match FacetLevelValueU32Codec::bytes_decode(key) {
|
// match FacetLevelValueU32Codec::bytes_decode(key) {
|
||||||
Some(_) => {
|
// Some(_) => {
|
||||||
// If we are able to parse this key it means it is a facet string group
|
// // If we are able to parse this key it means it is a facet string group
|
||||||
// level key. We must then parse the value using the appropriate codec.
|
// // level key. We must then parse the value using the appropriate codec.
|
||||||
let (group, mut docids) =
|
// let (group, mut docids) =
|
||||||
FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(val)
|
// FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_decode(val)
|
||||||
.ok_or_else(|| SerializationError::Decoding { db_name })?;
|
// .ok_or_else(|| SerializationError::Decoding { db_name })?;
|
||||||
|
|
||||||
let previous_len = docids.len();
|
// let previous_len = docids.len();
|
||||||
docids -= to_remove;
|
// docids -= to_remove;
|
||||||
if docids.is_empty() {
|
// if docids.is_empty() {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// // safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
// unsafe { iter.del_current()? };
|
||||||
} else if docids.len() != previous_len {
|
// } else if docids.len() != previous_len {
|
||||||
let key = key.to_owned();
|
// let key = key.to_owned();
|
||||||
let val = &(group, docids);
|
// let val = &(group, docids);
|
||||||
let value_bytes =
|
// let value_bytes =
|
||||||
FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(val)
|
// FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(val)
|
||||||
.ok_or_else(|| SerializationError::Encoding { db_name })?;
|
// .ok_or_else(|| SerializationError::Encoding { db_name })?;
|
||||||
|
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// // safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.put_current(&key, &value_bytes)? };
|
// unsafe { iter.put_current(&key, &value_bytes)? };
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
None => {
|
// None => {
|
||||||
// The key corresponds to a level zero facet string.
|
// // The key corresponds to a level zero facet string.
|
||||||
let (original_value, mut docids) =
|
// let (original_value, mut docids) =
|
||||||
FacetStringLevelZeroValueCodec::bytes_decode(val)
|
// FacetStringLevelZeroValueCodec::bytes_decode(val)
|
||||||
.ok_or_else(|| SerializationError::Decoding { db_name })?;
|
// .ok_or_else(|| SerializationError::Decoding { db_name })?;
|
||||||
|
|
||||||
let previous_len = docids.len();
|
// let previous_len = docids.len();
|
||||||
docids -= to_remove;
|
// docids -= to_remove;
|
||||||
if docids.is_empty() {
|
// if docids.is_empty() {
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// // safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.del_current()? };
|
// unsafe { iter.del_current()? };
|
||||||
} else if docids.len() != previous_len {
|
// } else if docids.len() != previous_len {
|
||||||
let key = key.to_owned();
|
// let key = key.to_owned();
|
||||||
let val = &(original_value, docids);
|
// let val = &(original_value, docids);
|
||||||
let value_bytes = FacetStringLevelZeroValueCodec::bytes_encode(val)
|
// let value_bytes = FacetStringLevelZeroValueCodec::bytes_encode(val)
|
||||||
.ok_or_else(|| SerializationError::Encoding { db_name })?;
|
// .ok_or_else(|| SerializationError::Encoding { db_name })?;
|
||||||
|
|
||||||
// safety: we don't keep references from inside the LMDB database.
|
// // safety: we don't keep references from inside the LMDB database.
|
||||||
unsafe { iter.put_current(&key, &value_bytes)? };
|
// unsafe { iter.put_current(&key, &value_bytes)? };
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
@ -136,11 +136,12 @@ use roaring::RoaringBitmap;
|
|||||||
use time::OffsetDateTime;
|
use time::OffsetDateTime;
|
||||||
|
|
||||||
use crate::error::InternalError;
|
use crate::error::InternalError;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::new::ordered_f64_codec::OrderedF64Codec;
|
||||||
FacetLevelValueF64Codec, FacetLevelValueU32Codec, FacetStringLevelZeroCodec,
|
use crate::heed_codec::facet::new::str_ref::StrRefCodec;
|
||||||
FacetStringLevelZeroValueCodec, FacetStringZeroBoundsValueCodec,
|
use crate::heed_codec::facet::new::{
|
||||||
|
FacetGroupValue, FacetGroupValueCodec, FacetKey, FacetKeyCodec,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
// use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::update::index_documents::{create_writer, write_into_lmdb_database, writer_into_reader};
|
use crate::update::index_documents::{create_writer, write_into_lmdb_database, writer_into_reader};
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
|
|
||||||
@ -187,16 +188,18 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
|
|
||||||
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
debug!("Computing and writing the facet values levels docids into LMDB on disk...");
|
||||||
|
|
||||||
|
let mut nested_wtxn = self.index.env.nested_write_txn(self.wtxn).unwrap();
|
||||||
|
|
||||||
for field_id in faceted_fields {
|
for field_id in faceted_fields {
|
||||||
// Clear the facet string levels.
|
// Clear the facet string levels.
|
||||||
clear_field_string_levels(
|
// clear_field_string_levels(
|
||||||
self.wtxn,
|
// &mut nested_wtxn,
|
||||||
self.index.facet_id_string_docids.remap_types::<ByteSlice, DecodeIgnore>(),
|
// self.index.facet_id_string_docids.remap_types::<ByteSlice, DecodeIgnore>(),
|
||||||
field_id,
|
// field_id,
|
||||||
)?;
|
// )?;
|
||||||
|
|
||||||
let (facet_string_levels, string_documents_ids) = compute_facet_strings_levels(
|
let (facet_string_levels, string_documents_ids) = compute_facet_strings_levels(
|
||||||
self.wtxn,
|
&mut nested_wtxn,
|
||||||
self.index.facet_id_string_docids,
|
self.index.facet_id_string_docids,
|
||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
@ -206,13 +209,13 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
self.index.put_string_faceted_documents_ids(
|
self.index.put_string_faceted_documents_ids(
|
||||||
self.wtxn,
|
&mut nested_wtxn,
|
||||||
field_id,
|
field_id,
|
||||||
&string_documents_ids,
|
&string_documents_ids,
|
||||||
)?;
|
)?;
|
||||||
for facet_strings_level in facet_string_levels {
|
for facet_strings_level in facet_string_levels {
|
||||||
write_into_lmdb_database(
|
write_into_lmdb_database(
|
||||||
self.wtxn,
|
&mut nested_wtxn,
|
||||||
*self.index.facet_id_string_docids.as_polymorph(),
|
*self.index.facet_id_string_docids.as_polymorph(),
|
||||||
facet_strings_level,
|
facet_strings_level,
|
||||||
|_, _| {
|
|_, _| {
|
||||||
@ -221,11 +224,11 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Clear the facet number levels.
|
// // Clear the facet number levels.
|
||||||
clear_field_number_levels(self.wtxn, self.index.facet_id_f64_docids, field_id)?;
|
// clear_field_number_levels(&mut nested_wtxn, self.index.facet_id_f64_docids, field_id)?;
|
||||||
|
|
||||||
let (facet_number_levels, number_documents_ids) = compute_facet_number_levels(
|
let (facet_number_levels, number_documents_ids) = compute_facet_number_levels(
|
||||||
self.wtxn,
|
&mut nested_wtxn,
|
||||||
self.index.facet_id_f64_docids,
|
self.index.facet_id_f64_docids,
|
||||||
self.chunk_compression_type,
|
self.chunk_compression_type,
|
||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
@ -235,14 +238,14 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
self.index.put_number_faceted_documents_ids(
|
self.index.put_number_faceted_documents_ids(
|
||||||
self.wtxn,
|
&mut nested_wtxn,
|
||||||
field_id,
|
field_id,
|
||||||
&number_documents_ids,
|
&number_documents_ids,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
for facet_number_level in facet_number_levels {
|
for facet_number_level in facet_number_levels {
|
||||||
write_into_lmdb_database(
|
write_into_lmdb_database(
|
||||||
self.wtxn,
|
&mut nested_wtxn,
|
||||||
*self.index.facet_id_f64_docids.as_polymorph(),
|
*self.index.facet_id_f64_docids.as_polymorph(),
|
||||||
facet_number_level,
|
facet_number_level,
|
||||||
|_, _| {
|
|_, _| {
|
||||||
@ -263,8 +266,8 @@ impl<'t, 'u, 'i> Facets<'t, 'u, 'i> {
|
|||||||
/// that must be inserted into the database.
|
/// that must be inserted into the database.
|
||||||
/// 2. a roaring bitmap of all the document ids present in the database
|
/// 2. a roaring bitmap of all the document ids present in the database
|
||||||
fn compute_facet_number_levels<'t>(
|
fn compute_facet_number_levels<'t>(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t mut heed::RwTxn,
|
||||||
db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
db: heed::Database<FacetKeyCodec<OrderedF64Codec>, FacetGroupValueCodec>,
|
||||||
compression_type: CompressionType,
|
compression_type: CompressionType,
|
||||||
compression_level: Option<u32>,
|
compression_level: Option<u32>,
|
||||||
level_group_size: NonZeroUsize,
|
level_group_size: NonZeroUsize,
|
||||||
@ -277,7 +280,7 @@ fn compute_facet_number_levels<'t>(
|
|||||||
.remap_types::<DecodeIgnore, DecodeIgnore>()
|
.remap_types::<DecodeIgnore, DecodeIgnore>()
|
||||||
.fold(Ok(0usize), |count, result| result.and(count).map(|c| c + 1))?;
|
.fold(Ok(0usize), |count, result| result.and(count).map(|c| c + 1))?;
|
||||||
|
|
||||||
let level_0_start = (field_id, 0, f64::MIN, f64::MIN);
|
let level_0_start = FacetKey { field_id, level: 0, left_bound: f64::MIN };
|
||||||
|
|
||||||
// Groups sizes are always a power of the original level_group_size and therefore a group
|
// Groups sizes are always a power of the original level_group_size and therefore a group
|
||||||
// always maps groups of the previous level and never splits previous levels groups in half.
|
// always maps groups of the previous level and never splits previous levels groups in half.
|
||||||
@ -289,37 +292,31 @@ fn compute_facet_number_levels<'t>(
|
|||||||
let mut number_document_ids = RoaringBitmap::new();
|
let mut number_document_ids = RoaringBitmap::new();
|
||||||
|
|
||||||
if let Some((top_level, _)) = group_size_iter.last() {
|
if let Some((top_level, _)) = group_size_iter.last() {
|
||||||
let subwriters =
|
let subwriters = recursive_compute_levels::<OrderedF64Codec>(
|
||||||
recursive_compute_levels::<FacetLevelValueF64Codec, CboRoaringBitmapCodec, f64>(
|
rtxn,
|
||||||
rtxn,
|
db,
|
||||||
db,
|
compression_type,
|
||||||
compression_type,
|
compression_level,
|
||||||
compression_level,
|
field_id,
|
||||||
*top_level,
|
*top_level,
|
||||||
level_0_start,
|
level_0_start,
|
||||||
&(level_0_start..),
|
&(level_0_start..),
|
||||||
first_level_size,
|
first_level_size,
|
||||||
level_group_size,
|
level_group_size,
|
||||||
&mut |bitmaps, _, _| {
|
&mut |bitmaps, _| {
|
||||||
for bitmap in bitmaps {
|
for bitmap in bitmaps {
|
||||||
number_document_ids |= bitmap;
|
number_document_ids |= bitmap;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
&|_i, (_field_id, _level, left, _right)| *left,
|
)?;
|
||||||
&|bitmap| bitmap,
|
|
||||||
&|writer, level, left, right, docids| {
|
|
||||||
write_number_entry(writer, field_id, level.get(), left, right, &docids)?;
|
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
)?;
|
|
||||||
|
|
||||||
Ok((subwriters, number_document_ids))
|
Ok((subwriters, number_document_ids))
|
||||||
} else {
|
} else {
|
||||||
let mut documents_ids = RoaringBitmap::new();
|
let mut documents_ids = RoaringBitmap::new();
|
||||||
for result in db.range(rtxn, &(level_0_start..))?.take(first_level_size) {
|
for result in db.range(rtxn, &(level_0_start..))?.take(first_level_size) {
|
||||||
let (_key, docids) = result?;
|
let (_key, group_value) = result?;
|
||||||
documents_ids |= docids;
|
documents_ids |= group_value.bitmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((vec![], documents_ids))
|
Ok((vec![], documents_ids))
|
||||||
@ -333,8 +330,8 @@ fn compute_facet_number_levels<'t>(
|
|||||||
/// that must be inserted into the database.
|
/// that must be inserted into the database.
|
||||||
/// 2. a roaring bitmap of all the document ids present in the database
|
/// 2. a roaring bitmap of all the document ids present in the database
|
||||||
fn compute_facet_strings_levels<'t>(
|
fn compute_facet_strings_levels<'t>(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t mut heed::RwTxn,
|
||||||
db: heed::Database<FacetStringLevelZeroCodec, FacetStringLevelZeroValueCodec>,
|
db: heed::Database<FacetKeyCodec<StrRefCodec>, FacetGroupValueCodec>,
|
||||||
compression_type: CompressionType,
|
compression_type: CompressionType,
|
||||||
compression_level: Option<u32>,
|
compression_level: Option<u32>,
|
||||||
level_group_size: NonZeroUsize,
|
level_group_size: NonZeroUsize,
|
||||||
@ -347,7 +344,7 @@ fn compute_facet_strings_levels<'t>(
|
|||||||
.remap_types::<DecodeIgnore, DecodeIgnore>()
|
.remap_types::<DecodeIgnore, DecodeIgnore>()
|
||||||
.fold(Ok(0usize), |count, result| result.and(count).map(|c| c + 1))?;
|
.fold(Ok(0usize), |count, result| result.and(count).map(|c| c + 1))?;
|
||||||
|
|
||||||
let level_0_start = (field_id, "");
|
let level_0_start = FacetKey { field_id, level: 0, left_bound: "" };
|
||||||
|
|
||||||
// Groups sizes are always a power of the original level_group_size and therefore a group
|
// Groups sizes are always a power of the original level_group_size and therefore a group
|
||||||
// always maps groups of the previous level and never splits previous levels groups in half.
|
// always maps groups of the previous level and never splits previous levels groups in half.
|
||||||
@ -359,40 +356,31 @@ fn compute_facet_strings_levels<'t>(
|
|||||||
let mut strings_document_ids = RoaringBitmap::new();
|
let mut strings_document_ids = RoaringBitmap::new();
|
||||||
|
|
||||||
if let Some((top_level, _)) = group_size_iter.last() {
|
if let Some((top_level, _)) = group_size_iter.last() {
|
||||||
let subwriters = recursive_compute_levels::<
|
let subwriters = recursive_compute_levels::<StrRefCodec>(
|
||||||
FacetStringLevelZeroCodec,
|
|
||||||
FacetStringLevelZeroValueCodec,
|
|
||||||
(u32, &str),
|
|
||||||
>(
|
|
||||||
rtxn,
|
rtxn,
|
||||||
db,
|
db,
|
||||||
compression_type,
|
compression_type,
|
||||||
compression_level,
|
compression_level,
|
||||||
|
field_id,
|
||||||
*top_level,
|
*top_level,
|
||||||
level_0_start,
|
level_0_start,
|
||||||
&(level_0_start..),
|
&(level_0_start..),
|
||||||
first_level_size,
|
first_level_size,
|
||||||
level_group_size,
|
level_group_size,
|
||||||
&mut |bitmaps, _, _| {
|
&mut |bitmaps, _| {
|
||||||
for bitmap in bitmaps {
|
for bitmap in bitmaps {
|
||||||
strings_document_ids |= bitmap;
|
strings_document_ids |= bitmap;
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
&|i, (_field_id, value)| (i as u32, *value),
|
|
||||||
&|value| value.1,
|
|
||||||
&|writer, level, start_bound, end_bound, docids| {
|
|
||||||
write_string_entry(writer, field_id, level, start_bound, end_bound, docids)?;
|
|
||||||
Ok(())
|
|
||||||
},
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
Ok((subwriters, strings_document_ids))
|
Ok((subwriters, strings_document_ids))
|
||||||
} else {
|
} else {
|
||||||
let mut documents_ids = RoaringBitmap::new();
|
let mut documents_ids = RoaringBitmap::new();
|
||||||
for result in db.range(rtxn, &(level_0_start..))?.take(first_level_size) {
|
for result in db.range(rtxn, &(level_0_start..))?.take(first_level_size) {
|
||||||
let (_key, (_original_value, docids)) = result?;
|
let (_key, group_value) = result?;
|
||||||
documents_ids |= docids;
|
documents_ids |= group_value.bitmap;
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok((vec![], documents_ids))
|
Ok((vec![], documents_ids))
|
||||||
@ -436,29 +424,26 @@ from the level below were read/created. Its arguments are:
|
|||||||
A vector of grenad::Reader. The reader at index `i` corresponds to the elements of level `i + 1`
|
A vector of grenad::Reader. The reader at index `i` corresponds to the elements of level `i + 1`
|
||||||
that must be inserted into the database.
|
that must be inserted into the database.
|
||||||
*/
|
*/
|
||||||
fn recursive_compute_levels<'t, KeyCodec, ValueCodec, Bound>(
|
fn recursive_compute_levels<'t, BoundCodec>(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t mut heed::RwTxn,
|
||||||
db: heed::Database<KeyCodec, ValueCodec>,
|
db: heed::Database<FacetKeyCodec<BoundCodec>, FacetGroupValueCodec>,
|
||||||
compression_type: CompressionType,
|
compression_type: CompressionType,
|
||||||
compression_level: Option<u32>,
|
compression_level: Option<u32>,
|
||||||
|
field_id: FieldId,
|
||||||
level: u8,
|
level: u8,
|
||||||
level_0_start: <KeyCodec as BytesDecode<'t>>::DItem,
|
level_0_start: FacetKey<<BoundCodec as BytesEncode<'t>>::EItem>,
|
||||||
level_0_range: &'t RangeFrom<<KeyCodec as BytesDecode<'t>>::DItem>,
|
level_0_range: &'t RangeFrom<FacetKey<<BoundCodec as BytesEncode<'t>>::EItem>>,
|
||||||
level_0_size: usize,
|
level_0_size: usize,
|
||||||
level_group_size: NonZeroUsize,
|
level_group_size: NonZeroUsize,
|
||||||
computed_group_bitmap: &mut dyn FnMut(&[RoaringBitmap], Bound, Bound) -> Result<()>,
|
computed_group_bitmap: &mut dyn FnMut(
|
||||||
bound_from_db_key: &dyn for<'a> Fn(usize, &'a <KeyCodec as BytesDecode<'t>>::DItem) -> Bound,
|
&[RoaringBitmap],
|
||||||
bitmap_from_db_value: &dyn Fn(<ValueCodec as BytesDecode<'t>>::DItem) -> RoaringBitmap,
|
<BoundCodec as BytesEncode<'t>>::EItem,
|
||||||
write_entry: &dyn Fn(&mut Writer<File>, NonZeroU8, Bound, Bound, RoaringBitmap) -> Result<()>,
|
) -> Result<()>,
|
||||||
) -> Result<Vec<Reader<File>>>
|
) -> Result<Vec<Reader<File>>>
|
||||||
where
|
where
|
||||||
KeyCodec: for<'a> BytesEncode<'a>
|
for<'a> BoundCodec:
|
||||||
+ for<'a> BytesDecode<'a, DItem = <KeyCodec as BytesEncode<'a>>::EItem>,
|
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
||||||
for<'a> <KeyCodec as BytesEncode<'a>>::EItem: Sized,
|
for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Copy + Sized,
|
||||||
ValueCodec: for<'a> BytesEncode<'a>
|
|
||||||
+ for<'a> BytesDecode<'a, DItem = <ValueCodec as BytesEncode<'a>>::EItem>,
|
|
||||||
for<'a> <ValueCodec as BytesEncode<'a>>::EItem: Sized,
|
|
||||||
Bound: Copy,
|
|
||||||
{
|
{
|
||||||
if level == 0 {
|
if level == 0 {
|
||||||
// base case for the recursion
|
// base case for the recursion
|
||||||
@ -468,31 +453,32 @@ where
|
|||||||
// 2. fill the `bitmaps` vector to give it to level 1 once `level_group_size` elements were read
|
// 2. fill the `bitmaps` vector to give it to level 1 once `level_group_size` elements were read
|
||||||
let mut bitmaps = vec![];
|
let mut bitmaps = vec![];
|
||||||
|
|
||||||
let mut start_bound = bound_from_db_key(0, &level_0_start);
|
let mut start_bound = level_0_start.left_bound;
|
||||||
let mut end_bound = bound_from_db_key(0, &level_0_start);
|
// let mut end_bound = level_0_start.bound;
|
||||||
|
|
||||||
let mut first_iteration_for_new_group = true;
|
let mut first_iteration_for_new_group = true;
|
||||||
for (i, db_result_item) in db.range(rtxn, level_0_range)?.take(level_0_size).enumerate() {
|
for (i, db_result_item) in db.range(rtxn, level_0_range)?.take(level_0_size).enumerate() {
|
||||||
let (key, value) = db_result_item?;
|
let (key, value) = db_result_item?;
|
||||||
|
|
||||||
let bound = bound_from_db_key(i, &key);
|
let bound = key.left_bound;
|
||||||
let docids = bitmap_from_db_value(value);
|
let docids = value.bitmap;
|
||||||
|
|
||||||
if first_iteration_for_new_group {
|
if first_iteration_for_new_group {
|
||||||
start_bound = bound;
|
start_bound = bound;
|
||||||
first_iteration_for_new_group = false;
|
first_iteration_for_new_group = false;
|
||||||
}
|
}
|
||||||
end_bound = bound;
|
// end_bound = bound;
|
||||||
bitmaps.push(docids);
|
bitmaps.push(docids);
|
||||||
|
|
||||||
if bitmaps.len() == level_group_size.get() {
|
if bitmaps.len() == level_group_size.get() {
|
||||||
computed_group_bitmap(&bitmaps, start_bound, end_bound)?;
|
computed_group_bitmap(&bitmaps, start_bound)?;
|
||||||
first_iteration_for_new_group = true;
|
first_iteration_for_new_group = true;
|
||||||
bitmaps.clear();
|
bitmaps.clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// don't forget to give the leftover bitmaps as well
|
// don't forget to give the leftover bitmaps as well
|
||||||
if !bitmaps.is_empty() {
|
if !bitmaps.is_empty() {
|
||||||
computed_group_bitmap(&bitmaps, start_bound, end_bound)?;
|
computed_group_bitmap(&bitmaps, start_bound)?;
|
||||||
bitmaps.clear();
|
bitmaps.clear();
|
||||||
}
|
}
|
||||||
// level 0 is already stored in the DB
|
// level 0 is already stored in the DB
|
||||||
@ -516,48 +502,52 @@ where
|
|||||||
db,
|
db,
|
||||||
compression_type,
|
compression_type,
|
||||||
compression_level,
|
compression_level,
|
||||||
|
field_id,
|
||||||
level - 1,
|
level - 1,
|
||||||
level_0_start,
|
level_0_start,
|
||||||
level_0_range,
|
level_0_range,
|
||||||
level_0_size,
|
level_0_size,
|
||||||
level_group_size,
|
level_group_size,
|
||||||
&mut |sub_bitmaps: &[RoaringBitmap], start_range, end_range| {
|
&mut |sub_bitmaps: &[RoaringBitmap],
|
||||||
|
start_range: <BoundCodec as BytesEncode<'t>>::EItem| {
|
||||||
let mut combined_bitmap = RoaringBitmap::default();
|
let mut combined_bitmap = RoaringBitmap::default();
|
||||||
for bitmap in sub_bitmaps {
|
for bitmap in sub_bitmaps {
|
||||||
combined_bitmap |= bitmap;
|
combined_bitmap |= bitmap;
|
||||||
}
|
}
|
||||||
range_for_bitmaps.push((start_range, end_range));
|
range_for_bitmaps.push(start_range);
|
||||||
|
|
||||||
bitmaps.push(combined_bitmap);
|
bitmaps.push(combined_bitmap);
|
||||||
if bitmaps.len() == level_group_size.get() {
|
if bitmaps.len() == level_group_size.get() {
|
||||||
let start_bound = range_for_bitmaps.first().unwrap().0;
|
let start_bound = range_for_bitmaps.first().unwrap();
|
||||||
let end_bound = range_for_bitmaps.last().unwrap().1;
|
computed_group_bitmap(&bitmaps, *start_bound)?;
|
||||||
computed_group_bitmap(&bitmaps, start_bound, end_bound)?;
|
for (bitmap, start_bound) in bitmaps.drain(..).zip(range_for_bitmaps.drain(..))
|
||||||
for (bitmap, (start_bound, end_bound)) in
|
|
||||||
bitmaps.drain(..).zip(range_for_bitmaps.drain(..))
|
|
||||||
{
|
{
|
||||||
write_entry(
|
write_entry::<BoundCodec>(
|
||||||
&mut cur_writer,
|
&mut cur_writer,
|
||||||
|
field_id,
|
||||||
NonZeroU8::new(level).unwrap(),
|
NonZeroU8::new(level).unwrap(),
|
||||||
start_bound,
|
start_bound,
|
||||||
end_bound,
|
|
||||||
bitmap,
|
bitmap,
|
||||||
)?;
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
},
|
},
|
||||||
bound_from_db_key,
|
|
||||||
bitmap_from_db_value,
|
|
||||||
write_entry,
|
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// don't forget to insert the leftover elements into the writer as well
|
// don't forget to insert the leftover elements into the writer as well
|
||||||
if !bitmaps.is_empty() {
|
if !bitmaps.is_empty() {
|
||||||
let start_range = range_for_bitmaps.first().unwrap().0;
|
let start_range = range_for_bitmaps.first().unwrap();
|
||||||
let end_range = range_for_bitmaps.last().unwrap().1;
|
let end_range = range_for_bitmaps.last().unwrap();
|
||||||
computed_group_bitmap(&bitmaps, start_range, end_range)?;
|
computed_group_bitmap(&bitmaps, *start_range)?;
|
||||||
for (bitmap, (left, right)) in bitmaps.drain(..).zip(range_for_bitmaps.drain(..)) {
|
for (bitmap, bound) in bitmaps.drain(..).zip(range_for_bitmaps.drain(..)) {
|
||||||
write_entry(&mut cur_writer, NonZeroU8::new(level).unwrap(), left, right, bitmap)?;
|
write_entry(
|
||||||
|
&mut cur_writer,
|
||||||
|
field_id,
|
||||||
|
NonZeroU8::new(level).unwrap(),
|
||||||
|
bound,
|
||||||
|
bitmap,
|
||||||
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -566,60 +556,25 @@ where
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn clear_field_number_levels<'t>(
|
fn write_entry<BoundCodec>(
|
||||||
wtxn: &'t mut heed::RwTxn,
|
|
||||||
db: heed::Database<FacetLevelValueF64Codec, CboRoaringBitmapCodec>,
|
|
||||||
field_id: FieldId,
|
|
||||||
) -> heed::Result<()> {
|
|
||||||
let left = (field_id, 1, f64::MIN, f64::MIN);
|
|
||||||
let right = (field_id, u8::MAX, f64::MAX, f64::MAX);
|
|
||||||
let range = left..=right;
|
|
||||||
db.delete_range(wtxn, &range).map(drop)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn clear_field_string_levels<'t>(
|
|
||||||
wtxn: &'t mut heed::RwTxn,
|
|
||||||
db: heed::Database<ByteSlice, DecodeIgnore>,
|
|
||||||
field_id: FieldId,
|
|
||||||
) -> heed::Result<()> {
|
|
||||||
let left = (field_id, NonZeroU8::new(1).unwrap(), u32::MIN, u32::MIN);
|
|
||||||
let right = (field_id, NonZeroU8::new(u8::MAX).unwrap(), u32::MAX, u32::MAX);
|
|
||||||
let range = left..=right;
|
|
||||||
db.remap_key_type::<FacetLevelValueU32Codec>().delete_range(wtxn, &range).map(drop)
|
|
||||||
}
|
|
||||||
|
|
||||||
fn write_number_entry(
|
|
||||||
writer: &mut Writer<File>,
|
|
||||||
field_id: FieldId,
|
|
||||||
level: u8,
|
|
||||||
left: f64,
|
|
||||||
right: f64,
|
|
||||||
ids: &RoaringBitmap,
|
|
||||||
) -> Result<()> {
|
|
||||||
let key = (field_id, level, left, right);
|
|
||||||
let key = FacetLevelValueF64Codec::bytes_encode(&key).ok_or(Error::Encoding)?;
|
|
||||||
let data = CboRoaringBitmapCodec::bytes_encode(&ids).ok_or(Error::Encoding)?;
|
|
||||||
writer.insert(&key, &data)?;
|
|
||||||
Ok(())
|
|
||||||
}
|
|
||||||
fn write_string_entry(
|
|
||||||
writer: &mut Writer<File>,
|
writer: &mut Writer<File>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
level: NonZeroU8,
|
level: NonZeroU8,
|
||||||
(left_id, left_value): (u32, &str),
|
bound: <BoundCodec as BytesEncode<'_>>::EItem,
|
||||||
(right_id, right_value): (u32, &str),
|
|
||||||
docids: RoaringBitmap,
|
docids: RoaringBitmap,
|
||||||
) -> Result<()> {
|
) -> Result<()>
|
||||||
let key = (field_id, level, left_id, right_id);
|
where
|
||||||
let key = FacetLevelValueU32Codec::bytes_encode(&key).ok_or(Error::Encoding)?;
|
for<'a> BoundCodec: BytesEncode<'a>,
|
||||||
let data = match level.get() {
|
for<'a> <BoundCodec as BytesEncode<'a>>::EItem: Copy + Sized,
|
||||||
1 => (Some((left_value, right_value)), docids),
|
{
|
||||||
_ => (None, docids),
|
todo!()
|
||||||
};
|
// let key = FacetKey { field_id, level: level.get(), left_bound: bound };
|
||||||
let data = FacetStringZeroBoundsValueCodec::<CboRoaringBitmapCodec>::bytes_encode(&data)
|
// let key_bytes = FacetKeyCodec::<BoundCodec>::bytes_encode(&key).ok_or(Error::Encoding)?;
|
||||||
.ok_or(Error::Encoding)?;
|
// let value_bytes =
|
||||||
writer.insert(&key, &data)?;
|
// FacetGroupValueCodec::bytes_encode(&FacetGroupValue { size: 4, bitmap: docids })
|
||||||
Ok(())
|
// .ok_or(Error::Encoding)?;
|
||||||
|
// writer.insert(&key_bytes, &value_bytes)?;
|
||||||
|
// Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
|
@ -6,7 +6,7 @@ use heed::{BytesDecode, BytesEncode};
|
|||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
|
create_sorter, merge_cbo_roaring_bitmaps, sorter_into_reader, GrenadParameters,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::facet::{FacetLevelValueF64Codec, FieldDocIdFacetF64Codec};
|
use crate::heed_codec::facet::FieldDocIdFacetF64Codec;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// Extracts the facet number and the documents ids where this facet number appear.
|
/// Extracts the facet number and the documents ids where this facet number appear.
|
||||||
@ -31,13 +31,14 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
let mut cursor = docid_fid_facet_number.into_cursor()?;
|
let mut cursor = docid_fid_facet_number.into_cursor()?;
|
||||||
while let Some((key_bytes, _)) = cursor.move_on_next()? {
|
while let Some((key_bytes, _)) = cursor.move_on_next()? {
|
||||||
let (field_id, document_id, number) =
|
todo!()
|
||||||
FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
// let (field_id, document_id, number) =
|
||||||
|
// FieldDocIdFacetF64Codec::bytes_decode(key_bytes).unwrap();
|
||||||
|
|
||||||
let key = (field_id, 0, number, number);
|
// let key = (field_id, 0, number, number);
|
||||||
let key_bytes = FacetLevelValueF64Codec::bytes_encode(&key).unwrap();
|
// // let key_bytes = FacetLevelValueF64Codec::bytes_encode(&key).unwrap();
|
||||||
|
|
||||||
facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
|
// facet_number_docids_sorter.insert(key_bytes, document_id.to_ne_bytes())?;
|
||||||
}
|
}
|
||||||
|
|
||||||
sorter_into_reader(facet_number_docids_sorter, indexer)
|
sorter_into_reader(facet_number_docids_sorter, indexer)
|
||||||
|
@ -4,11 +4,9 @@ use std::{io, str};
|
|||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::helpers::{
|
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||||
create_sorter, keep_first_prefix_value_merge_roaring_bitmaps, sorter_into_reader,
|
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
||||||
try_split_array_at, GrenadParameters,
|
// use crate::heed_codec::facet::{encode_prefix_string, FacetStringLevelZeroCodec};
|
||||||
};
|
|
||||||
use crate::heed_codec::facet::{encode_prefix_string, FacetStringLevelZeroCodec};
|
|
||||||
use crate::{FieldId, Result};
|
use crate::{FieldId, Result};
|
||||||
|
|
||||||
/// Extracts the facet string and the documents ids where this facet string appear.
|
/// Extracts the facet string and the documents ids where this facet string appear.
|
||||||
@ -24,7 +22,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
|
|
||||||
let mut facet_string_docids_sorter = create_sorter(
|
let mut facet_string_docids_sorter = create_sorter(
|
||||||
grenad::SortAlgorithm::Stable,
|
grenad::SortAlgorithm::Stable,
|
||||||
keep_first_prefix_value_merge_roaring_bitmaps,
|
merge_cbo_roaring_bitmaps, // TODO: check
|
||||||
indexer.chunk_compression_type,
|
indexer.chunk_compression_type,
|
||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
@ -42,14 +40,16 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||||||
let original_value = str::from_utf8(original_value_bytes)?;
|
let original_value = str::from_utf8(original_value_bytes)?;
|
||||||
|
|
||||||
key_buffer.clear();
|
key_buffer.clear();
|
||||||
FacetStringLevelZeroCodec::serialize_into(
|
// TODO
|
||||||
field_id,
|
// FacetStringLevelZeroCodec::serialize_into(
|
||||||
str::from_utf8(normalized_value_bytes)?,
|
// field_id,
|
||||||
&mut key_buffer,
|
// str::from_utf8(normalized_value_bytes)?,
|
||||||
);
|
// &mut key_buffer,
|
||||||
|
// );
|
||||||
|
|
||||||
value_buffer.clear();
|
value_buffer.clear();
|
||||||
encode_prefix_string(original_value, &mut value_buffer)?;
|
// TODO
|
||||||
|
// encode_prefix_string(original_value, &mut value_buffer)?;
|
||||||
let bitmap = RoaringBitmap::from_iter(Some(document_id));
|
let bitmap = RoaringBitmap::from_iter(Some(document_id));
|
||||||
bitmap.serialize_into(&mut value_buffer)?;
|
bitmap.serialize_into(&mut value_buffer)?;
|
||||||
|
|
||||||
|
@ -25,8 +25,8 @@ use self::extract_word_docids::extract_word_docids;
|
|||||||
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
|
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
|
||||||
use self::extract_word_position_docids::extract_word_position_docids;
|
use self::extract_word_position_docids::extract_word_position_docids;
|
||||||
use super::helpers::{
|
use super::helpers::{
|
||||||
as_cloneable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps,
|
as_cloneable_grenad, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, CursorClonableMmap,
|
||||||
merge_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn, MergeableReader,
|
GrenadParameters, MergeFn, MergeableReader,
|
||||||
};
|
};
|
||||||
use super::{helpers, TypedChunk};
|
use super::{helpers, TypedChunk};
|
||||||
use crate::{FieldId, Result};
|
use crate::{FieldId, Result};
|
||||||
@ -142,7 +142,7 @@ pub(crate) fn data_from_obkv_documents(
|
|||||||
indexer,
|
indexer,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
extract_facet_string_docids,
|
extract_facet_string_docids,
|
||||||
keep_first_prefix_value_merge_roaring_bitmaps,
|
merge_roaring_bitmaps, // TODO: check (cbo?)
|
||||||
TypedChunk::FieldIdFacetStringDocids,
|
TypedChunk::FieldIdFacetStringDocids,
|
||||||
"field-id-facet-string-docids",
|
"field-id-facet-string-docids",
|
||||||
);
|
);
|
||||||
|
@ -5,7 +5,7 @@ use std::result::Result as StdResult;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::read_u32_ne_bytes;
|
use super::read_u32_ne_bytes;
|
||||||
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
// use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
@ -49,32 +49,32 @@ pub fn merge_roaring_bitmaps<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Resul
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn keep_first_prefix_value_merge_roaring_bitmaps<'a>(
|
// pub fn keep_first_prefix_value_merge_roaring_bitmaps<'a>(
|
||||||
_key: &[u8],
|
// _key: &[u8],
|
||||||
values: &[Cow<'a, [u8]>],
|
// values: &[Cow<'a, [u8]>],
|
||||||
) -> Result<Cow<'a, [u8]>> {
|
// ) -> Result<Cow<'a, [u8]>> {
|
||||||
if values.len() == 1 {
|
// if values.len() == 1 {
|
||||||
Ok(values[0].clone())
|
// Ok(values[0].clone())
|
||||||
} else {
|
// } else {
|
||||||
let original = decode_prefix_string(&values[0]).unwrap().0;
|
// let original = decode_prefix_string(&values[0]).unwrap().0;
|
||||||
let merged_bitmaps = values
|
// let merged_bitmaps = values
|
||||||
.iter()
|
// .iter()
|
||||||
.map(AsRef::as_ref)
|
// .map(AsRef::as_ref)
|
||||||
.map(decode_prefix_string)
|
// .map(decode_prefix_string)
|
||||||
.map(Option::unwrap)
|
// .map(Option::unwrap)
|
||||||
.map(|(_, bitmap_bytes)| bitmap_bytes)
|
// .map(|(_, bitmap_bytes)| bitmap_bytes)
|
||||||
.map(RoaringBitmap::deserialize_from)
|
// .map(RoaringBitmap::deserialize_from)
|
||||||
.map(StdResult::unwrap)
|
// .map(StdResult::unwrap)
|
||||||
.reduce(|a, b| a | b)
|
// .reduce(|a, b| a | b)
|
||||||
.unwrap();
|
// .unwrap();
|
||||||
|
|
||||||
let cap = std::mem::size_of::<u16>() + original.len() + merged_bitmaps.serialized_size();
|
// let cap = std::mem::size_of::<u16>() + original.len() + merged_bitmaps.serialized_size();
|
||||||
let mut buffer = Vec::with_capacity(cap);
|
// let mut buffer = Vec::with_capacity(cap);
|
||||||
encode_prefix_string(original, &mut buffer)?;
|
// encode_prefix_string(original, &mut buffer)?;
|
||||||
merged_bitmaps.serialize_into(&mut buffer)?;
|
// merged_bitmaps.serialize_into(&mut buffer)?;
|
||||||
Ok(Cow::Owned(buffer))
|
// Ok(Cow::Owned(buffer))
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
pub fn keep_first<'a>(_key: &[u8], values: &[Cow<'a, [u8]>]) -> Result<Cow<'a, [u8]>> {
|
||||||
Ok(values[0].clone())
|
Ok(values[0].clone())
|
||||||
|
@ -13,9 +13,9 @@ pub use grenad_helpers::{
|
|||||||
writer_into_reader, GrenadParameters, MergeableReader,
|
writer_into_reader, GrenadParameters, MergeableReader,
|
||||||
};
|
};
|
||||||
pub use merge_functions::{
|
pub use merge_functions::{
|
||||||
concat_u32s_array, keep_first, keep_first_prefix_value_merge_roaring_bitmaps, keep_latest_obkv,
|
concat_u32s_array, keep_first, keep_latest_obkv, merge_cbo_roaring_bitmaps, merge_obkvs,
|
||||||
merge_cbo_roaring_bitmaps, merge_obkvs, merge_roaring_bitmaps, merge_two_obkvs,
|
merge_roaring_bitmaps, merge_two_obkvs, roaring_bitmap_from_u32s_array,
|
||||||
roaring_bitmap_from_u32s_array, serialize_roaring_bitmap, MergeFn,
|
serialize_roaring_bitmap, MergeFn,
|
||||||
};
|
};
|
||||||
|
|
||||||
/// The maximum length a word can be
|
/// The maximum length a word can be
|
||||||
|
@ -13,7 +13,6 @@ use super::helpers::{
|
|||||||
valid_lmdb_key, CursorClonableMmap,
|
valid_lmdb_key, CursorClonableMmap,
|
||||||
};
|
};
|
||||||
use super::{ClonableMmap, MergeFn};
|
use super::{ClonableMmap, MergeFn};
|
||||||
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
|
|
||||||
use crate::update::index_documents::helpers::as_cloneable_grenad;
|
use crate::update::index_documents::helpers::as_cloneable_grenad;
|
||||||
use crate::{
|
use crate::{
|
||||||
lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
|
lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
|
||||||
@ -197,13 +196,14 @@ pub(crate) fn write_typed_chunk_into_index(
|
|||||||
index_is_empty,
|
index_is_empty,
|
||||||
|value, _buffer| Ok(value),
|
|value, _buffer| Ok(value),
|
||||||
|new_values, db_values, buffer| {
|
|new_values, db_values, buffer| {
|
||||||
let (_, new_values) = decode_prefix_string(new_values).unwrap();
|
todo!()
|
||||||
let new_values = RoaringBitmap::deserialize_from(new_values)?;
|
// let (_, new_values) = decode_prefix_string(new_values).unwrap();
|
||||||
let (db_original, db_values) = decode_prefix_string(db_values).unwrap();
|
// let new_values = RoaringBitmap::deserialize_from(new_values)?;
|
||||||
let db_values = RoaringBitmap::deserialize_from(db_values)?;
|
// let (db_original, db_values) = decode_prefix_string(db_values).unwrap();
|
||||||
let values = new_values | db_values;
|
// let db_values = RoaringBitmap::deserialize_from(db_values)?;
|
||||||
encode_prefix_string(db_original, buffer)?;
|
// let values = new_values | db_values;
|
||||||
Ok(values.serialize_into(buffer)?)
|
// encode_prefix_string(db_original, buffer)?;
|
||||||
|
// Ok(values.serialize_into(buffer)?)
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
is_merged_database = true;
|
is_merged_database = true;
|
||||||
|
Loading…
x
Reference in New Issue
Block a user