Move StrRefCodec and ByteSliceRefCodec to their own files

This commit is contained in:
Loïc Lecrenier 2022-10-12 09:42:55 +02:00
parent 1165ba2171
commit a034a1e628
18 changed files with 140 additions and 107 deletions

View File

@ -57,7 +57,7 @@ md5 = "0.7.0"
rand = {version = "0.8.5", features = ["small_rng"] }
[target.'cfg(fuzzing)'.dev-dependencies]
fuzzcheck = { git = "https://github.com/loiclec/fuzzcheck-rs", branch = "main" }
fuzzcheck = { git = "https://github.com/loiclec/fuzzcheck-rs", branch = "main" } # TODO: use released version
[features]
default = [ "charabia/default" ]

View File

@ -0,0 +1,23 @@
use std::borrow::Cow;
use heed::{BytesDecode, BytesEncode};
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
pub struct ByteSliceRefCodec;
impl<'a> BytesEncode<'a> for ByteSliceRefCodec {
type EItem = &'a [u8];
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item))
}
}
impl<'a> BytesDecode<'a> for ByteSliceRefCodec {
type DItem = &'a [u8];
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Some(bytes)
}
}

View File

@ -1,6 +1,5 @@
mod field_doc_id_facet_codec;
mod ordered_f64_codec;
mod str_ref;
use std::borrow::Cow;
use std::convert::TryFrom;
@ -12,9 +11,10 @@ use roaring::RoaringBitmap;
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
pub use self::ordered_f64_codec::OrderedF64Codec;
pub use self::str_ref::StrRefCodec;
use crate::{CboRoaringBitmapCodec, BEU16};
use super::StrRefCodec;
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
@ -33,7 +33,7 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
/// The key in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`]
/// databases.
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] // TODO: try removing PartialOrd and Ord
pub struct FacetGroupKey<T> {
pub field_id: u16,
pub level: u8,
@ -103,23 +103,3 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
Some(FacetGroupValue { size, bitmap })
}
}
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
pub struct ByteSliceRef;
impl<'a> BytesEncode<'a> for ByteSliceRef {
type EItem = &'a [u8];
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
Some(Cow::Borrowed(item))
}
}
impl<'a> BytesDecode<'a> for ByteSliceRef {
type DItem = &'a [u8];
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
Some(bytes)
}
}

View File

@ -1,10 +1,12 @@
mod beu32_str_codec;
mod byte_slice_ref;
pub mod facet;
mod field_id_word_count_codec;
mod obkv_codec;
mod roaring_bitmap;
mod roaring_bitmap_length;
mod str_beu32_codec;
mod str_ref;
mod str_str_u8_codec;
pub use self::beu32_str_codec::BEU32StrCodec;
@ -16,3 +18,5 @@ pub use self::roaring_bitmap_length::{
};
pub use self::str_beu32_codec::StrBEU32Codec;
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
pub use byte_slice_ref::ByteSliceRefCodec;
pub use str_ref::StrRefCodec;

View File

@ -16,8 +16,9 @@ use crate::facet::FacetType;
use crate::fields_ids_map::FieldsIdsMap;
use crate::heed_codec::facet::{
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
FieldIdCodec, OrderedF64Codec, StrRefCodec,
FieldIdCodec, OrderedF64Codec,
};
use crate::heed_codec::StrRefCodec;
use crate::{
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,

View File

@ -7,7 +7,8 @@ use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::facet::FacetType;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::search::query_tree::Operation;
@ -194,14 +195,14 @@ fn facet_ordered<'t>(
let number_iter = make_iter(
rtxn,
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates.clone(),
)?;
let string_iter = make_iter(
rtxn,
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates,
)?;

View File

@ -9,9 +9,10 @@ use roaring::RoaringBitmap;
use crate::error::UserError;
use crate::facet::FacetType;
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec,
FieldDocIdFacetStringCodec, OrderedF64Codec, StrRefCodec,
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
OrderedF64Codec,
};
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
use crate::search::facet::facet_distribution_iter;
use crate::{FieldId, Index, Result};
@ -137,7 +138,9 @@ impl<'a> FacetDistribution<'a> {
) -> heed::Result<()> {
facet_distribution_iter::iterate_over_facet_distribution(
self.rtxn,
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
self.index
.facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates,
|facet_key, nbr_docids, _| {
@ -160,7 +163,9 @@ impl<'a> FacetDistribution<'a> {
) -> heed::Result<()> {
facet_distribution_iter::iterate_over_facet_distribution(
self.rtxn,
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
self.index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
candidates,
|facet_key, nbr_docids, any_docid| {

View File

@ -4,9 +4,8 @@ use heed::Result;
use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::DocumentId;
/// Call the given closure on the facet distribution of the candidate documents.
@ -22,7 +21,7 @@ use crate::DocumentId;
/// keep iterating over the different facet values or stop.
pub fn iterate_over_facet_distribution<'t, CB>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: &RoaringBitmap,
callback: CB,
@ -31,10 +30,13 @@ where
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{
let mut fd = FacetDistribution { rtxn, db, field_id, callback };
let highest_level =
get_highest_level(rtxn, db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), field_id)?;
let highest_level = get_highest_level(
rtxn,
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id,
)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
return Ok(());
} else {
@ -47,7 +49,7 @@ where
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
{
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
callback: CB,
}
@ -72,11 +74,13 @@ where
if key.field_id != self.field_id {
return Ok(ControlFlow::Break(()));
}
// TODO: use real intersection and then take min()?
let docids_in_common = value.bitmap.intersection_len(candidates);
if docids_in_common > 0 {
// TODO: use min()
let any_docid = value.bitmap.iter().next().unwrap();
match (self.callback)(key.left_bound, docids_in_common, any_docid)? {
ControlFlow::Continue(_) => {}
ControlFlow::Continue(_) => (), // TODO use unit instead of empty scope
ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
}
}

View File

@ -4,9 +4,8 @@ use heed::BytesEncode;
use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::Result;
/// Find all the document ids for which the given field contains a value contained within
@ -47,13 +46,16 @@ where
}
Bound::Unbounded => Bound::Unbounded,
};
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(starting_left_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
let rightmost_bound =
Bound::Included(get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
if let Some(starting_left_bound) =
get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?
{
let rightmost_bound = Bound::Included(
get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(),
); // will not fail because get_first_facet_value succeeded
let group_size = usize::MAX;
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
Ok(())
@ -65,7 +67,7 @@ where
/// Fetch the document ids that have a facet with a value between the two given bounds
struct FacetRangeSearch<'t, 'b, 'bitmap> {
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
left: Bound<&'b [u8]>,
right: Bound<&'b [u8]>,

View File

@ -3,8 +3,9 @@ use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level};
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
/// Return an iterator which iterates over the given candidate documents in
/// ascending order of their facet value for the given field id.
@ -30,12 +31,12 @@ use crate::heed_codec::facet::{
/// Note that once a document id is returned by the iterator, it is never returned again.
pub fn ascending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
@ -47,11 +48,13 @@ pub fn ascending_facet_sort<'t>(
struct AscendingFacetSort<'t, 'e> {
rtxn: &'t heed::RoTxn<'e>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
stack: Vec<(
RoaringBitmap,
std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
std::iter::Take<
heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
>,
)>,
}

View File

@ -5,22 +5,23 @@ use roaring::RoaringBitmap;
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
///
/// This function does the same thing, but in the opposite order.
pub fn descending_facet_sort<'t>(
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
candidates: RoaringBitmap,
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
let highest_level = get_highest_level(rtxn, db, field_id)?;
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
Ok(Box::new(DescendingFacetSort {
@ -36,12 +37,12 @@ pub fn descending_facet_sort<'t>(
struct DescendingFacetSort<'t> {
rtxn: &'t heed::RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
stack: Vec<(
RoaringBitmap,
std::iter::Take<
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
>,
Bound<&'t [u8]>,
)>,
@ -97,7 +98,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
*right_bound = Bound::Excluded(left_bound);
let iter = match self
.db
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
.rev_range(
&self.rtxn,
&(Bound::Included(starting_key_below), end_key_kelow),
@ -121,7 +122,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
mod tests {
use roaring::RoaringBitmap;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec;
use crate::milli_snap;
use crate::search::facet::facet_sort_descending::descending_facet_sort;
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
@ -134,7 +136,7 @@ mod tests {
let txn = index.env.read_txn().unwrap();
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
let mut results = String::new();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
for el in iter {
let docids = el.unwrap();

View File

@ -5,8 +5,8 @@ use heed::{BytesDecode, RoTxn};
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
pub use self::filter::Filter;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
mod facet_distribution;
mod facet_distribution_iter;
mod facet_range_search;
@ -17,7 +17,7 @@ mod filter;
/// Get the first facet value in the facet database
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>
where
@ -42,7 +42,7 @@ where
/// Get the last facet value in the facet database
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
txn: &'t RoTxn,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<Option<BoundCodec::DItem>>
where
@ -67,7 +67,7 @@ where
/// Get the height of the highest level in the facet database
pub(crate) fn get_highest_level<'t>(
txn: &'t RoTxn<'t>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
field_id: u16,
) -> heed::Result<u8> {
let field_id_prefix = &field_id.to_be_bytes();
@ -77,7 +77,7 @@ pub(crate) fn get_highest_level<'t>(
.next()
.map(|el| {
let (key, _) = el.unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key).unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap();
key.level
})
.unwrap_or(0))

View File

@ -11,8 +11,9 @@ use time::OffsetDateTime;
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
use crate::facet::FacetType;
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::update::index_documents::{create_writer, writer_into_reader};
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
@ -75,11 +76,11 @@ impl<'i> FacetsUpdateBulk<'i> {
let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self;
let db = match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
}
};
@ -98,7 +99,7 @@ impl<'i> FacetsUpdateBulk<'i> {
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
pub new_data: Option<grenad::Reader<R>>,
pub group_size: u8,
pub min_level_size: u8,
@ -216,7 +217,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
.db
.as_polymorph()
.prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())?
.remap_types::<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>();
.remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>();
let mut left_bound: &[u8] = &[];
let mut first_iteration_for_new_group = true;
@ -299,7 +300,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{
let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key)
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value =
@ -328,7 +329,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
{
let key = FacetGroupKey { field_id, level, left_bound };
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key)
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
.ok_or(Error::Encoding)?;
let value = FacetGroupValue { size: group_size, bitmap };
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;

View File

@ -1,7 +1,8 @@
use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
use crate::{
facet::FacetType,
heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
heed_codec::ByteSliceRefCodec,
update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner},
FieldId, Index, Result,
};
@ -11,7 +12,7 @@ use std::collections::{HashMap, HashSet};
pub struct FacetsDelete<'i, 'b> {
index: &'i Index,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
facet_type: FacetType,
affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
docids_to_delete: &'b RoaringBitmap,
@ -27,11 +28,11 @@ impl<'i, 'b> FacetsDelete<'i, 'b> {
docids_to_delete: &'b RoaringBitmap,
) -> Self {
let database = match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
}
};
Self {

View File

@ -7,8 +7,9 @@ use roaring::RoaringBitmap;
use crate::facet::FacetType;
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::facet::get_highest_level;
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
@ -50,10 +51,10 @@ impl<'i> FacetsUpdateIncremental<'i> {
db: match facet_type {
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
FacetType::Number => index
.facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
},
group_size,
max_group_size,
@ -69,7 +70,7 @@ impl<'i> FacetsUpdateIncremental<'i> {
let mut cursor = self.new_data.into_cursor()?;
while let Some((key, value)) = cursor.move_on_next()? {
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key)
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
.ok_or(heed::Error::Encoding)?;
let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?;
@ -87,7 +88,7 @@ impl<'i> FacetsUpdateIncremental<'i> {
/// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type
pub struct FacetsUpdateIncrementalInner {
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
pub group_size: u8,
pub min_level_size: u8,
pub max_group_size: u8,
@ -126,7 +127,7 @@ impl FacetsUpdateIncrementalInner {
if let Some(e) = prefix_iter.next() {
let (key_bytes, value) = e?;
Ok((
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?
.into_owned(),
value,
@ -149,7 +150,7 @@ impl FacetsUpdateIncrementalInner {
)?;
let (key_bytes, value) = iter.next().unwrap()?;
Ok((
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?
.into_owned(),
value,
@ -411,7 +412,7 @@ impl FacetsUpdateIncrementalInner {
let mut values = RoaringBitmap::new();
for _ in 0..group_size {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?;
if first_key.is_none() {
@ -434,7 +435,7 @@ impl FacetsUpdateIncrementalInner {
let mut values = RoaringBitmap::new();
for _ in 0..nbr_leftover_elements {
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
.ok_or(Error::Encoding)?;
if first_key.is_none() {
@ -616,7 +617,7 @@ impl FacetsUpdateIncrementalInner {
while let Some(el) = iter.next() {
let (k, _) = el?;
to_delete.push(
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k)
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k)
.ok_or(Error::Encoding)?
.into_owned(),
);
@ -655,7 +656,8 @@ mod tests {
use rand::{Rng, SeedableRng};
use roaring::RoaringBitmap;
use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec};
use crate::heed_codec::facet::OrderedF64Codec;
use crate::heed_codec::StrRefCodec;
use crate::milli_snap;
use crate::update::facet::tests::FacetIndex;
@ -1019,6 +1021,7 @@ mod tests {
// fuzz tests
}
#[cfg(all(test, fuzzing))]
mod fuzz {
use std::borrow::Cow;

View File

@ -77,7 +77,8 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
use self::incremental::FacetsUpdateIncremental;
use super::FacetsUpdateBulk;
use crate::facet::FacetType;
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
use crate::heed_codec::ByteSliceRefCodec;
use crate::{Index, Result};
use std::fs::File;
@ -87,7 +88,7 @@ pub mod incremental;
pub struct FacetsUpdate<'i> {
index: &'i Index,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
facet_type: FacetType,
new_data: grenad::Reader<File>,
group_size: u8,
@ -97,11 +98,11 @@ pub struct FacetsUpdate<'i> {
impl<'i> FacetsUpdate<'i> {
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
let database = match facet_type {
FacetType::String => {
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
}
FacetType::String => index
.facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
FacetType::Number => {
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
}
};
Self {
@ -159,8 +160,9 @@ pub(crate) mod tests {
use super::bulk::FacetsUpdateBulkInner;
use crate::heed_codec::facet::{
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
};
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::facet::get_highest_level;
use crate::snapshot_tests::display_bitmap;
use crate::update::FacetsUpdateIncrementalInner;
@ -173,7 +175,7 @@ pub(crate) mod tests {
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
{
pub env: Env,
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
pub group_size: Cell<u8>,
pub min_level_size: Cell<u8>,
pub max_group_size: Cell<u8>,
@ -327,7 +329,7 @@ pub(crate) mod tests {
let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned();
let key: FacetGroupKey<&[u8]> =
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key).unwrap();
let value = CboRoaringBitmapCodec::bytes_encode(&docids).unwrap();
writer.insert(&key, &value).unwrap();
}
@ -362,7 +364,7 @@ pub(crate) mod tests {
.unwrap();
while let Some(el) = iter.next() {
let (key, value) = el.unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key).unwrap();
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key).unwrap();
let mut prefix_start_below = vec![];
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
@ -379,7 +381,7 @@ pub(crate) mod tests {
)
.unwrap();
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes).unwrap()
};
assert!(value.size > 0);

View File

@ -4,7 +4,8 @@ use std::io;
use heed::BytesEncode;
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, StrRefCodec};
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
use crate::heed_codec::StrRefCodec;
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
use crate::{FieldId, Result};