mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Move StrRefCodec and ByteSliceRefCodec to their own files
This commit is contained in:
parent
1165ba2171
commit
a034a1e628
@ -57,7 +57,7 @@ md5 = "0.7.0"
|
||||
rand = {version = "0.8.5", features = ["small_rng"] }
|
||||
|
||||
[target.'cfg(fuzzing)'.dev-dependencies]
|
||||
fuzzcheck = { git = "https://github.com/loiclec/fuzzcheck-rs", branch = "main" }
|
||||
fuzzcheck = { git = "https://github.com/loiclec/fuzzcheck-rs", branch = "main" } # TODO: use released version
|
||||
|
||||
[features]
|
||||
default = [ "charabia/default" ]
|
||||
|
23
milli/src/heed_codec/byte_slice_ref.rs
Normal file
23
milli/src/heed_codec/byte_slice_ref.rs
Normal file
@ -0,0 +1,23 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
use heed::{BytesDecode, BytesEncode};
|
||||
|
||||
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
|
||||
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
|
||||
pub struct ByteSliceRefCodec;
|
||||
|
||||
impl<'a> BytesEncode<'a> for ByteSliceRefCodec {
|
||||
type EItem = &'a [u8];
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for ByteSliceRefCodec {
|
||||
type DItem = &'a [u8];
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(bytes)
|
||||
}
|
||||
}
|
@ -1,6 +1,5 @@
|
||||
mod field_doc_id_facet_codec;
|
||||
mod ordered_f64_codec;
|
||||
mod str_ref;
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::convert::TryFrom;
|
||||
@ -12,9 +11,10 @@ use roaring::RoaringBitmap;
|
||||
|
||||
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
|
||||
pub use self::ordered_f64_codec::OrderedF64Codec;
|
||||
pub use self::str_ref::StrRefCodec;
|
||||
use crate::{CboRoaringBitmapCodec, BEU16};
|
||||
|
||||
use super::StrRefCodec;
|
||||
|
||||
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
|
||||
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
|
||||
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
|
||||
@ -33,7 +33,7 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
|
||||
|
||||
/// The key in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`]
|
||||
/// databases.
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] // TODO: try removing PartialOrd and Ord
|
||||
pub struct FacetGroupKey<T> {
|
||||
pub field_id: u16,
|
||||
pub level: u8,
|
||||
@ -103,23 +103,3 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
||||
Some(FacetGroupValue { size, bitmap })
|
||||
}
|
||||
}
|
||||
|
||||
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
|
||||
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
|
||||
pub struct ByteSliceRef;
|
||||
|
||||
impl<'a> BytesEncode<'a> for ByteSliceRef {
|
||||
type EItem = &'a [u8];
|
||||
|
||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||
Some(Cow::Borrowed(item))
|
||||
}
|
||||
}
|
||||
|
||||
impl<'a> BytesDecode<'a> for ByteSliceRef {
|
||||
type DItem = &'a [u8];
|
||||
|
||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||
Some(bytes)
|
||||
}
|
||||
}
|
||||
|
@ -1,10 +1,12 @@
|
||||
mod beu32_str_codec;
|
||||
mod byte_slice_ref;
|
||||
pub mod facet;
|
||||
mod field_id_word_count_codec;
|
||||
mod obkv_codec;
|
||||
mod roaring_bitmap;
|
||||
mod roaring_bitmap_length;
|
||||
mod str_beu32_codec;
|
||||
mod str_ref;
|
||||
mod str_str_u8_codec;
|
||||
|
||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||
@ -16,3 +18,5 @@ pub use self::roaring_bitmap_length::{
|
||||
};
|
||||
pub use self::str_beu32_codec::StrBEU32Codec;
|
||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||
pub use byte_slice_ref::ByteSliceRefCodec;
|
||||
pub use str_ref::StrRefCodec;
|
||||
|
@ -16,8 +16,9 @@ use crate::facet::FacetType;
|
||||
use crate::fields_ids_map::FieldsIdsMap;
|
||||
use crate::heed_codec::facet::{
|
||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
FieldIdCodec, OrderedF64Codec, StrRefCodec,
|
||||
FieldIdCodec, OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::StrRefCodec;
|
||||
use crate::{
|
||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||
|
@ -7,7 +7,8 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
|
||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||
use crate::search::query_tree::Operation;
|
||||
@ -194,14 +195,14 @@ fn facet_ordered<'t>(
|
||||
|
||||
let number_iter = make_iter(
|
||||
rtxn,
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
field_id,
|
||||
candidates.clone(),
|
||||
)?;
|
||||
|
||||
let string_iter = make_iter(
|
||||
rtxn,
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
)?;
|
||||
|
@ -9,9 +9,10 @@ use roaring::RoaringBitmap;
|
||||
use crate::error::UserError;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec,
|
||||
FieldDocIdFacetStringCodec, OrderedF64Codec, StrRefCodec,
|
||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||
OrderedF64Codec,
|
||||
};
|
||||
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
|
||||
use crate::search::facet::facet_distribution_iter;
|
||||
use crate::{FieldId, Index, Result};
|
||||
|
||||
@ -137,7 +138,9 @@ impl<'a> FacetDistribution<'a> {
|
||||
) -> heed::Result<()> {
|
||||
facet_distribution_iter::iterate_over_facet_distribution(
|
||||
self.rtxn,
|
||||
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
self.index
|
||||
.facet_id_f64_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
|facet_key, nbr_docids, _| {
|
||||
@ -160,7 +163,9 @@ impl<'a> FacetDistribution<'a> {
|
||||
) -> heed::Result<()> {
|
||||
facet_distribution_iter::iterate_over_facet_distribution(
|
||||
self.rtxn,
|
||||
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
self.index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
field_id,
|
||||
candidates,
|
||||
|facet_key, nbr_docids, any_docid| {
|
||||
|
@ -4,9 +4,8 @@ use heed::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level};
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::DocumentId;
|
||||
|
||||
/// Call the given closure on the facet distribution of the candidate documents.
|
||||
@ -22,7 +21,7 @@ use crate::DocumentId;
|
||||
/// keep iterating over the different facet values or stop.
|
||||
pub fn iterate_over_facet_distribution<'t, CB>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
candidates: &RoaringBitmap,
|
||||
callback: CB,
|
||||
@ -31,10 +30,13 @@ where
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
let mut fd = FacetDistribution { rtxn, db, field_id, callback };
|
||||
let highest_level =
|
||||
get_highest_level(rtxn, db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), field_id)?;
|
||||
let highest_level = get_highest_level(
|
||||
rtxn,
|
||||
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
field_id,
|
||||
)?;
|
||||
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||
return Ok(());
|
||||
} else {
|
||||
@ -47,7 +49,7 @@ where
|
||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||
{
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
callback: CB,
|
||||
}
|
||||
@ -72,11 +74,13 @@ where
|
||||
if key.field_id != self.field_id {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
// TODO: use real intersection and then take min()?
|
||||
let docids_in_common = value.bitmap.intersection_len(candidates);
|
||||
if docids_in_common > 0 {
|
||||
// TODO: use min()
|
||||
let any_docid = value.bitmap.iter().next().unwrap();
|
||||
match (self.callback)(key.left_bound, docids_in_common, any_docid)? {
|
||||
ControlFlow::Continue(_) => {}
|
||||
ControlFlow::Continue(_) => (), // TODO use unit instead of empty scope
|
||||
ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
|
||||
}
|
||||
}
|
||||
|
@ -4,9 +4,8 @@ use heed::BytesEncode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::Result;
|
||||
|
||||
/// Find all the document ids for which the given field contains a value contained within
|
||||
@ -47,13 +46,16 @@ where
|
||||
}
|
||||
Bound::Unbounded => Bound::Unbounded,
|
||||
};
|
||||
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
|
||||
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
|
||||
if let Some(starting_left_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||
let rightmost_bound =
|
||||
Bound::Included(get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
||||
if let Some(starting_left_bound) =
|
||||
get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?
|
||||
{
|
||||
let rightmost_bound = Bound::Included(
|
||||
get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(),
|
||||
); // will not fail because get_first_facet_value succeeded
|
||||
let group_size = usize::MAX;
|
||||
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
|
||||
Ok(())
|
||||
@ -65,7 +67,7 @@ where
|
||||
/// Fetch the document ids that have a facet with a value between the two given bounds
|
||||
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
left: Bound<&'b [u8]>,
|
||||
right: Bound<&'b [u8]>,
|
||||
|
@ -3,8 +3,9 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level};
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
|
||||
/// Return an iterator which iterates over the given candidate documents in
|
||||
/// ascending order of their facet value for the given field id.
|
||||
@ -30,12 +31,12 @@ use crate::heed_codec::facet::{
|
||||
/// Note that once a document id is returned by the iterator, it is never returned again.
|
||||
pub fn ascending_facet_sort<'t>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
||||
|
||||
@ -47,11 +48,13 @@ pub fn ascending_facet_sort<'t>(
|
||||
|
||||
struct AscendingFacetSort<'t, 'e> {
|
||||
rtxn: &'t heed::RoTxn<'e>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
stack: Vec<(
|
||||
RoaringBitmap,
|
||||
std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
|
||||
std::iter::Take<
|
||||
heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
>,
|
||||
)>,
|
||||
}
|
||||
|
||||
|
@ -5,22 +5,23 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
|
||||
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
|
||||
///
|
||||
/// This function does the same thing, but in the opposite order.
|
||||
pub fn descending_facet_sort<'t>(
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
candidates: RoaringBitmap,
|
||||
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
|
||||
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
|
||||
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
||||
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
||||
Ok(Box::new(DescendingFacetSort {
|
||||
@ -36,12 +37,12 @@ pub fn descending_facet_sort<'t>(
|
||||
|
||||
struct DescendingFacetSort<'t> {
|
||||
rtxn: &'t heed::RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
stack: Vec<(
|
||||
RoaringBitmap,
|
||||
std::iter::Take<
|
||||
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
>,
|
||||
Bound<&'t [u8]>,
|
||||
)>,
|
||||
@ -97,7 +98,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
||||
*right_bound = Bound::Excluded(left_bound);
|
||||
let iter = match self
|
||||
.db
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
||||
.rev_range(
|
||||
&self.rtxn,
|
||||
&(Bound::Included(starting_key_below), end_key_kelow),
|
||||
@ -121,7 +122,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
||||
mod tests {
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
|
||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::milli_snap;
|
||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
|
||||
@ -134,7 +136,7 @@ mod tests {
|
||||
let txn = index.env.read_txn().unwrap();
|
||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
||||
let mut results = String::new();
|
||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
|
||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
||||
for el in iter {
|
||||
let docids = el.unwrap();
|
||||
|
@ -5,8 +5,8 @@ use heed::{BytesDecode, RoTxn};
|
||||
|
||||
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
||||
pub use self::filter::Filter;
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
mod facet_distribution;
|
||||
mod facet_distribution_iter;
|
||||
mod facet_range_search;
|
||||
@ -17,7 +17,7 @@ mod filter;
|
||||
/// Get the first facet value in the facet database
|
||||
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@ -42,7 +42,7 @@ where
|
||||
/// Get the last facet value in the facet database
|
||||
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
||||
txn: &'t RoTxn,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||
where
|
||||
@ -67,7 +67,7 @@ where
|
||||
/// Get the height of the highest level in the facet database
|
||||
pub(crate) fn get_highest_level<'t>(
|
||||
txn: &'t RoTxn<'t>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
field_id: u16,
|
||||
) -> heed::Result<u8> {
|
||||
let field_id_prefix = &field_id.to_be_bytes();
|
||||
@ -77,7 +77,7 @@ pub(crate) fn get_highest_level<'t>(
|
||||
.next()
|
||||
.map(|el| {
|
||||
let (key, _) = el.unwrap();
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key).unwrap();
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap();
|
||||
key.level
|
||||
})
|
||||
.unwrap_or(0))
|
||||
|
@ -11,8 +11,9 @@ use time::OffsetDateTime;
|
||||
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::update::index_documents::{create_writer, writer_into_reader};
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||
|
||||
@ -75,11 +76,11 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self;
|
||||
|
||||
let db = match facet_type {
|
||||
FacetType::String => {
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
}
|
||||
FacetType::String => index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
FacetType::Number => {
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
||||
}
|
||||
};
|
||||
|
||||
@ -98,7 +99,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
||||
|
||||
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
|
||||
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
|
||||
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
pub new_data: Option<grenad::Reader<R>>,
|
||||
pub group_size: u8,
|
||||
pub min_level_size: u8,
|
||||
@ -216,7 +217,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
.db
|
||||
.as_polymorph()
|
||||
.prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())?
|
||||
.remap_types::<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>();
|
||||
.remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>();
|
||||
|
||||
let mut left_bound: &[u8] = &[];
|
||||
let mut first_iteration_for_new_group = true;
|
||||
@ -299,7 +300,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||
{
|
||||
let key = FacetGroupKey { field_id, level, left_bound };
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key)
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
|
||||
.ok_or(Error::Encoding)?;
|
||||
let value = FacetGroupValue { size: group_size, bitmap };
|
||||
let value =
|
||||
@ -328,7 +329,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||
{
|
||||
let key = FacetGroupKey { field_id, level, left_bound };
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key)
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
|
||||
.ok_or(Error::Encoding)?;
|
||||
let value = FacetGroupValue { size: group_size, bitmap };
|
||||
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
||||
|
@ -1,7 +1,8 @@
|
||||
use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||
use crate::{
|
||||
facet::FacetType,
|
||||
heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
|
||||
heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
|
||||
heed_codec::ByteSliceRefCodec,
|
||||
update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner},
|
||||
FieldId, Index, Result,
|
||||
};
|
||||
@ -11,7 +12,7 @@ use std::collections::{HashMap, HashSet};
|
||||
|
||||
pub struct FacetsDelete<'i, 'b> {
|
||||
index: &'i Index,
|
||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
facet_type: FacetType,
|
||||
affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
|
||||
docids_to_delete: &'b RoaringBitmap,
|
||||
@ -27,11 +28,11 @@ impl<'i, 'b> FacetsDelete<'i, 'b> {
|
||||
docids_to_delete: &'b RoaringBitmap,
|
||||
) -> Self {
|
||||
let database = match facet_type {
|
||||
FacetType::String => {
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
}
|
||||
FacetType::String => index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
FacetType::Number => {
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
||||
}
|
||||
};
|
||||
Self {
|
||||
|
@ -7,8 +7,9 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::search::facet::get_highest_level;
|
||||
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||
|
||||
@ -50,10 +51,10 @@ impl<'i> FacetsUpdateIncremental<'i> {
|
||||
db: match facet_type {
|
||||
FacetType::String => index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
FacetType::Number => index
|
||||
.facet_id_f64_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
},
|
||||
group_size,
|
||||
max_group_size,
|
||||
@ -69,7 +70,7 @@ impl<'i> FacetsUpdateIncremental<'i> {
|
||||
|
||||
let mut cursor = self.new_data.into_cursor()?;
|
||||
while let Some((key, value)) = cursor.move_on_next()? {
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key)
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
|
||||
.ok_or(heed::Error::Encoding)?;
|
||||
let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
|
||||
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?;
|
||||
@ -87,7 +88,7 @@ impl<'i> FacetsUpdateIncremental<'i> {
|
||||
|
||||
/// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type
|
||||
pub struct FacetsUpdateIncrementalInner {
|
||||
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
pub group_size: u8,
|
||||
pub min_level_size: u8,
|
||||
pub max_group_size: u8,
|
||||
@ -126,7 +127,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
if let Some(e) = prefix_iter.next() {
|
||||
let (key_bytes, value) = e?;
|
||||
Ok((
|
||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
||||
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
|
||||
.ok_or(Error::Encoding)?
|
||||
.into_owned(),
|
||||
value,
|
||||
@ -149,7 +150,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
)?;
|
||||
let (key_bytes, value) = iter.next().unwrap()?;
|
||||
Ok((
|
||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
||||
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
|
||||
.ok_or(Error::Encoding)?
|
||||
.into_owned(),
|
||||
value,
|
||||
@ -411,7 +412,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
let mut values = RoaringBitmap::new();
|
||||
for _ in 0..group_size {
|
||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
||||
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
|
||||
.ok_or(Error::Encoding)?;
|
||||
|
||||
if first_key.is_none() {
|
||||
@ -434,7 +435,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
let mut values = RoaringBitmap::new();
|
||||
for _ in 0..nbr_leftover_elements {
|
||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
||||
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
|
||||
.ok_or(Error::Encoding)?;
|
||||
|
||||
if first_key.is_none() {
|
||||
@ -616,7 +617,7 @@ impl FacetsUpdateIncrementalInner {
|
||||
while let Some(el) = iter.next() {
|
||||
let (k, _) = el?;
|
||||
to_delete.push(
|
||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k)
|
||||
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k)
|
||||
.ok_or(Error::Encoding)?
|
||||
.into_owned(),
|
||||
);
|
||||
@ -655,7 +656,8 @@ mod tests {
|
||||
use rand::{Rng, SeedableRng};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec};
|
||||
use crate::heed_codec::facet::OrderedF64Codec;
|
||||
use crate::heed_codec::StrRefCodec;
|
||||
use crate::milli_snap;
|
||||
use crate::update::facet::tests::FacetIndex;
|
||||
|
||||
@ -1019,6 +1021,7 @@ mod tests {
|
||||
|
||||
// fuzz tests
|
||||
}
|
||||
|
||||
#[cfg(all(test, fuzzing))]
|
||||
mod fuzz {
|
||||
use std::borrow::Cow;
|
||||
|
@ -77,7 +77,8 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
||||
use self::incremental::FacetsUpdateIncremental;
|
||||
use super::FacetsUpdateBulk;
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::{Index, Result};
|
||||
use std::fs::File;
|
||||
|
||||
@ -87,7 +88,7 @@ pub mod incremental;
|
||||
|
||||
pub struct FacetsUpdate<'i> {
|
||||
index: &'i Index,
|
||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
facet_type: FacetType,
|
||||
new_data: grenad::Reader<File>,
|
||||
group_size: u8,
|
||||
@ -97,11 +98,11 @@ pub struct FacetsUpdate<'i> {
|
||||
impl<'i> FacetsUpdate<'i> {
|
||||
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
||||
let database = match facet_type {
|
||||
FacetType::String => {
|
||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
}
|
||||
FacetType::String => index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
FacetType::Number => {
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
||||
}
|
||||
};
|
||||
Self {
|
||||
@ -159,8 +160,9 @@ pub(crate) mod tests {
|
||||
|
||||
use super::bulk::FacetsUpdateBulkInner;
|
||||
use crate::heed_codec::facet::{
|
||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||
};
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::search::facet::get_highest_level;
|
||||
use crate::snapshot_tests::display_bitmap;
|
||||
use crate::update::FacetsUpdateIncrementalInner;
|
||||
@ -173,7 +175,7 @@ pub(crate) mod tests {
|
||||
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
||||
{
|
||||
pub env: Env,
|
||||
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
||||
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
pub group_size: Cell<u8>,
|
||||
pub min_level_size: Cell<u8>,
|
||||
pub max_group_size: Cell<u8>,
|
||||
@ -327,7 +329,7 @@ pub(crate) mod tests {
|
||||
let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned();
|
||||
let key: FacetGroupKey<&[u8]> =
|
||||
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).unwrap();
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key).unwrap();
|
||||
let value = CboRoaringBitmapCodec::bytes_encode(&docids).unwrap();
|
||||
writer.insert(&key, &value).unwrap();
|
||||
}
|
||||
@ -362,7 +364,7 @@ pub(crate) mod tests {
|
||||
.unwrap();
|
||||
while let Some(el) = iter.next() {
|
||||
let (key, value) = el.unwrap();
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key).unwrap();
|
||||
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key).unwrap();
|
||||
|
||||
let mut prefix_start_below = vec![];
|
||||
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
|
||||
@ -379,7 +381,7 @@ pub(crate) mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
|
||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
|
||||
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes).unwrap()
|
||||
};
|
||||
|
||||
assert!(value.size > 0);
|
||||
|
@ -4,7 +4,8 @@ use std::io;
|
||||
use heed::BytesEncode;
|
||||
|
||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, StrRefCodec};
|
||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
||||
use crate::heed_codec::StrRefCodec;
|
||||
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
||||
use crate::{FieldId, Result};
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user