mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Move StrRefCodec and ByteSliceRefCodec to their own files
This commit is contained in:
parent
1165ba2171
commit
a034a1e628
@ -57,7 +57,7 @@ md5 = "0.7.0"
|
|||||||
rand = {version = "0.8.5", features = ["small_rng"] }
|
rand = {version = "0.8.5", features = ["small_rng"] }
|
||||||
|
|
||||||
[target.'cfg(fuzzing)'.dev-dependencies]
|
[target.'cfg(fuzzing)'.dev-dependencies]
|
||||||
fuzzcheck = { git = "https://github.com/loiclec/fuzzcheck-rs", branch = "main" }
|
fuzzcheck = { git = "https://github.com/loiclec/fuzzcheck-rs", branch = "main" } # TODO: use released version
|
||||||
|
|
||||||
[features]
|
[features]
|
||||||
default = [ "charabia/default" ]
|
default = [ "charabia/default" ]
|
||||||
|
23
milli/src/heed_codec/byte_slice_ref.rs
Normal file
23
milli/src/heed_codec/byte_slice_ref.rs
Normal file
@ -0,0 +1,23 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
use heed::{BytesDecode, BytesEncode};
|
||||||
|
|
||||||
|
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
|
||||||
|
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
|
||||||
|
pub struct ByteSliceRefCodec;
|
||||||
|
|
||||||
|
impl<'a> BytesEncode<'a> for ByteSliceRefCodec {
|
||||||
|
type EItem = &'a [u8];
|
||||||
|
|
||||||
|
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
||||||
|
Some(Cow::Borrowed(item))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> BytesDecode<'a> for ByteSliceRefCodec {
|
||||||
|
type DItem = &'a [u8];
|
||||||
|
|
||||||
|
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
||||||
|
Some(bytes)
|
||||||
|
}
|
||||||
|
}
|
@ -1,6 +1,5 @@
|
|||||||
mod field_doc_id_facet_codec;
|
mod field_doc_id_facet_codec;
|
||||||
mod ordered_f64_codec;
|
mod ordered_f64_codec;
|
||||||
mod str_ref;
|
|
||||||
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
@ -12,9 +11,10 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
|
pub use self::field_doc_id_facet_codec::FieldDocIdFacetCodec;
|
||||||
pub use self::ordered_f64_codec::OrderedF64Codec;
|
pub use self::ordered_f64_codec::OrderedF64Codec;
|
||||||
pub use self::str_ref::StrRefCodec;
|
|
||||||
use crate::{CboRoaringBitmapCodec, BEU16};
|
use crate::{CboRoaringBitmapCodec, BEU16};
|
||||||
|
|
||||||
|
use super::StrRefCodec;
|
||||||
|
|
||||||
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
|
pub type FieldDocIdFacetF64Codec = FieldDocIdFacetCodec<OrderedF64Codec>;
|
||||||
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
|
pub type FieldDocIdFacetStringCodec = FieldDocIdFacetCodec<StrRefCodec>;
|
||||||
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
|
pub type FieldDocIdFacetIgnoreCodec = FieldDocIdFacetCodec<DecodeIgnore>;
|
||||||
@ -33,7 +33,7 @@ pub fn try_split_at(slice: &[u8], mid: usize) -> Option<(&[u8], &[u8])> {
|
|||||||
|
|
||||||
/// The key in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`]
|
/// The key in the [`facet_id_string_docids` and `facet_id_f64_docids`][`Index::facet_id_string_docids`]
|
||||||
/// databases.
|
/// databases.
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] // TODO: try removing PartialOrd and Ord
|
||||||
pub struct FacetGroupKey<T> {
|
pub struct FacetGroupKey<T> {
|
||||||
pub field_id: u16,
|
pub field_id: u16,
|
||||||
pub level: u8,
|
pub level: u8,
|
||||||
@ -103,23 +103,3 @@ impl<'a> heed::BytesDecode<'a> for FacetGroupValueCodec {
|
|||||||
Some(FacetGroupValue { size, bitmap })
|
Some(FacetGroupValue { size, bitmap })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A codec for values of type `&[u8]`. Unlike `ByteSlice`, its `EItem` and `DItem` associated
|
|
||||||
/// types are equivalent (= `&'a [u8]`) and these values can reside within another structure.
|
|
||||||
pub struct ByteSliceRef;
|
|
||||||
|
|
||||||
impl<'a> BytesEncode<'a> for ByteSliceRef {
|
|
||||||
type EItem = &'a [u8];
|
|
||||||
|
|
||||||
fn bytes_encode(item: &'a Self::EItem) -> Option<Cow<'a, [u8]>> {
|
|
||||||
Some(Cow::Borrowed(item))
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<'a> BytesDecode<'a> for ByteSliceRef {
|
|
||||||
type DItem = &'a [u8];
|
|
||||||
|
|
||||||
fn bytes_decode(bytes: &'a [u8]) -> Option<Self::DItem> {
|
|
||||||
Some(bytes)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
@ -1,10 +1,12 @@
|
|||||||
mod beu32_str_codec;
|
mod beu32_str_codec;
|
||||||
|
mod byte_slice_ref;
|
||||||
pub mod facet;
|
pub mod facet;
|
||||||
mod field_id_word_count_codec;
|
mod field_id_word_count_codec;
|
||||||
mod obkv_codec;
|
mod obkv_codec;
|
||||||
mod roaring_bitmap;
|
mod roaring_bitmap;
|
||||||
mod roaring_bitmap_length;
|
mod roaring_bitmap_length;
|
||||||
mod str_beu32_codec;
|
mod str_beu32_codec;
|
||||||
|
mod str_ref;
|
||||||
mod str_str_u8_codec;
|
mod str_str_u8_codec;
|
||||||
|
|
||||||
pub use self::beu32_str_codec::BEU32StrCodec;
|
pub use self::beu32_str_codec::BEU32StrCodec;
|
||||||
@ -16,3 +18,5 @@ pub use self::roaring_bitmap_length::{
|
|||||||
};
|
};
|
||||||
pub use self::str_beu32_codec::StrBEU32Codec;
|
pub use self::str_beu32_codec::StrBEU32Codec;
|
||||||
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
|
||||||
|
pub use byte_slice_ref::ByteSliceRefCodec;
|
||||||
|
pub use str_ref::StrRefCodec;
|
||||||
|
@ -16,8 +16,9 @@ use crate::facet::FacetType;
|
|||||||
use crate::fields_ids_map::FieldsIdsMap;
|
use crate::fields_ids_map::FieldsIdsMap;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||||
FieldIdCodec, OrderedF64Codec, StrRefCodec,
|
FieldIdCodec, OrderedF64Codec,
|
||||||
};
|
};
|
||||||
|
use crate::heed_codec::StrRefCodec;
|
||||||
use crate::{
|
use crate::{
|
||||||
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
default_criteria, BEU32StrCodec, BoRoaringBitmapCodec, CboRoaringBitmapCodec, Criterion,
|
||||||
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
DocumentId, ExternalDocumentsIds, FacetDistribution, FieldDistribution, FieldId,
|
||||||
|
@ -7,7 +7,8 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
|
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||||
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
@ -194,14 +195,14 @@ fn facet_ordered<'t>(
|
|||||||
|
|
||||||
let number_iter = make_iter(
|
let number_iter = make_iter(
|
||||||
rtxn,
|
rtxn,
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
field_id,
|
field_id,
|
||||||
candidates.clone(),
|
candidates.clone(),
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
let string_iter = make_iter(
|
let string_iter = make_iter(
|
||||||
rtxn,
|
rtxn,
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
field_id,
|
field_id,
|
||||||
candidates,
|
candidates,
|
||||||
)?;
|
)?;
|
||||||
|
@ -9,9 +9,10 @@ use roaring::RoaringBitmap;
|
|||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec,
|
FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
|
||||||
FieldDocIdFacetStringCodec, OrderedF64Codec, StrRefCodec,
|
OrderedF64Codec,
|
||||||
};
|
};
|
||||||
|
use crate::heed_codec::{ByteSliceRefCodec, StrRefCodec};
|
||||||
use crate::search::facet::facet_distribution_iter;
|
use crate::search::facet::facet_distribution_iter;
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
|
|
||||||
@ -137,7 +138,9 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
facet_distribution_iter::iterate_over_facet_distribution(
|
facet_distribution_iter::iterate_over_facet_distribution(
|
||||||
self.rtxn,
|
self.rtxn,
|
||||||
self.index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
self.index
|
||||||
|
.facet_id_f64_docids
|
||||||
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
field_id,
|
field_id,
|
||||||
candidates,
|
candidates,
|
||||||
|facet_key, nbr_docids, _| {
|
|facet_key, nbr_docids, _| {
|
||||||
@ -160,7 +163,9 @@ impl<'a> FacetDistribution<'a> {
|
|||||||
) -> heed::Result<()> {
|
) -> heed::Result<()> {
|
||||||
facet_distribution_iter::iterate_over_facet_distribution(
|
facet_distribution_iter::iterate_over_facet_distribution(
|
||||||
self.rtxn,
|
self.rtxn,
|
||||||
self.index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
self.index
|
||||||
|
.facet_id_string_docids
|
||||||
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
field_id,
|
field_id,
|
||||||
candidates,
|
candidates,
|
||||||
|facet_key, nbr_docids, any_docid| {
|
|facet_key, nbr_docids, any_docid| {
|
||||||
|
@ -4,9 +4,8 @@ use heed::Result;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level};
|
use super::{get_first_facet_value, get_highest_level};
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
};
|
|
||||||
use crate::DocumentId;
|
use crate::DocumentId;
|
||||||
|
|
||||||
/// Call the given closure on the facet distribution of the candidate documents.
|
/// Call the given closure on the facet distribution of the candidate documents.
|
||||||
@ -22,7 +21,7 @@ use crate::DocumentId;
|
|||||||
/// keep iterating over the different facet values or stop.
|
/// keep iterating over the different facet values or stop.
|
||||||
pub fn iterate_over_facet_distribution<'t, CB>(
|
pub fn iterate_over_facet_distribution<'t, CB>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
callback: CB,
|
callback: CB,
|
||||||
@ -31,10 +30,13 @@ where
|
|||||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||||
{
|
{
|
||||||
let mut fd = FacetDistribution { rtxn, db, field_id, callback };
|
let mut fd = FacetDistribution { rtxn, db, field_id, callback };
|
||||||
let highest_level =
|
let highest_level = get_highest_level(
|
||||||
get_highest_level(rtxn, db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(), field_id)?;
|
rtxn,
|
||||||
|
db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
|
field_id,
|
||||||
|
)?;
|
||||||
|
|
||||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||||
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
fd.iterate(candidates, highest_level, first_bound, usize::MAX)?;
|
||||||
return Ok(());
|
return Ok(());
|
||||||
} else {
|
} else {
|
||||||
@ -47,7 +49,7 @@ where
|
|||||||
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
CB: FnMut(&'t [u8], u64, DocumentId) -> Result<ControlFlow<()>>,
|
||||||
{
|
{
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
callback: CB,
|
callback: CB,
|
||||||
}
|
}
|
||||||
@ -72,11 +74,13 @@ where
|
|||||||
if key.field_id != self.field_id {
|
if key.field_id != self.field_id {
|
||||||
return Ok(ControlFlow::Break(()));
|
return Ok(ControlFlow::Break(()));
|
||||||
}
|
}
|
||||||
|
// TODO: use real intersection and then take min()?
|
||||||
let docids_in_common = value.bitmap.intersection_len(candidates);
|
let docids_in_common = value.bitmap.intersection_len(candidates);
|
||||||
if docids_in_common > 0 {
|
if docids_in_common > 0 {
|
||||||
|
// TODO: use min()
|
||||||
let any_docid = value.bitmap.iter().next().unwrap();
|
let any_docid = value.bitmap.iter().next().unwrap();
|
||||||
match (self.callback)(key.left_bound, docids_in_common, any_docid)? {
|
match (self.callback)(key.left_bound, docids_in_common, any_docid)? {
|
||||||
ControlFlow::Continue(_) => {}
|
ControlFlow::Continue(_) => (), // TODO use unit instead of empty scope
|
||||||
ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
|
ControlFlow::Break(_) => return Ok(ControlFlow::Break(())),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,9 +4,8 @@ use heed::BytesEncode;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec,
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
};
|
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// Find all the document ids for which the given field contains a value contained within
|
/// Find all the document ids for which the given field contains a value contained within
|
||||||
@ -47,13 +46,16 @@ where
|
|||||||
}
|
}
|
||||||
Bound::Unbounded => Bound::Unbounded,
|
Bound::Unbounded => Bound::Unbounded,
|
||||||
};
|
};
|
||||||
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
|
let db = db.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||||
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
|
let mut f = FacetRangeSearch { rtxn, db, field_id, left, right, docids };
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
|
|
||||||
if let Some(starting_left_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
if let Some(starting_left_bound) =
|
||||||
let rightmost_bound =
|
get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?
|
||||||
Bound::Included(get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap()); // will not fail because get_first_facet_value succeeded
|
{
|
||||||
|
let rightmost_bound = Bound::Included(
|
||||||
|
get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap(),
|
||||||
|
); // will not fail because get_first_facet_value succeeded
|
||||||
let group_size = usize::MAX;
|
let group_size = usize::MAX;
|
||||||
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
|
f.run(highest_level, starting_left_bound, rightmost_bound, group_size)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -65,7 +67,7 @@ where
|
|||||||
/// Fetch the document ids that have a facet with a value between the two given bounds
|
/// Fetch the document ids that have a facet with a value between the two given bounds
|
||||||
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
struct FacetRangeSearch<'t, 'b, 'bitmap> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
left: Bound<&'b [u8]>,
|
left: Bound<&'b [u8]>,
|
||||||
right: Bound<&'b [u8]>,
|
right: Bound<&'b [u8]>,
|
||||||
|
@ -3,8 +3,9 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level};
|
use super::{get_first_facet_value, get_highest_level};
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
|
|
||||||
/// Return an iterator which iterates over the given candidate documents in
|
/// Return an iterator which iterates over the given candidate documents in
|
||||||
/// ascending order of their facet value for the given field id.
|
/// ascending order of their facet value for the given field id.
|
||||||
@ -30,12 +31,12 @@ use crate::heed_codec::facet::{
|
|||||||
/// Note that once a document id is returned by the iterator, it is never returned again.
|
/// Note that once a document id is returned by the iterator, it is never returned again.
|
||||||
pub fn ascending_facet_sort<'t>(
|
pub fn ascending_facet_sort<'t>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
let iter = db.range(rtxn, &(first_key..)).unwrap().take(usize::MAX);
|
||||||
|
|
||||||
@ -47,11 +48,13 @@ pub fn ascending_facet_sort<'t>(
|
|||||||
|
|
||||||
struct AscendingFacetSort<'t, 'e> {
|
struct AscendingFacetSort<'t, 'e> {
|
||||||
rtxn: &'t heed::RoTxn<'e>,
|
rtxn: &'t heed::RoTxn<'e>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
stack: Vec<(
|
stack: Vec<(
|
||||||
RoaringBitmap,
|
RoaringBitmap,
|
||||||
std::iter::Take<heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>>,
|
std::iter::Take<
|
||||||
|
heed::RoRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
|
>,
|
||||||
)>,
|
)>,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -5,22 +5,23 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
use super::{get_first_facet_value, get_highest_level, get_last_facet_value};
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
|
|
||||||
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
|
/// See documentationg for [`ascending_facet_sort`](super::ascending_facet_sort).
|
||||||
///
|
///
|
||||||
/// This function does the same thing, but in the opposite order.
|
/// This function does the same thing, but in the opposite order.
|
||||||
pub fn descending_facet_sort<'t>(
|
pub fn descending_facet_sort<'t>(
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
) -> Result<Box<dyn Iterator<Item = Result<RoaringBitmap>> + 't>> {
|
||||||
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
let highest_level = get_highest_level(rtxn, db, field_id)?;
|
||||||
if let Some(first_bound) = get_first_facet_value::<ByteSliceRef>(rtxn, db, field_id)? {
|
if let Some(first_bound) = get_first_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)? {
|
||||||
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
let first_key = FacetGroupKey { field_id, level: highest_level, left_bound: first_bound };
|
||||||
let last_bound = get_last_facet_value::<ByteSliceRef>(rtxn, db, field_id)?.unwrap();
|
let last_bound = get_last_facet_value::<ByteSliceRefCodec>(rtxn, db, field_id)?.unwrap();
|
||||||
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
let last_key = FacetGroupKey { field_id, level: highest_level, left_bound: last_bound };
|
||||||
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
let iter = db.rev_range(rtxn, &(first_key..=last_key))?.take(usize::MAX);
|
||||||
Ok(Box::new(DescendingFacetSort {
|
Ok(Box::new(DescendingFacetSort {
|
||||||
@ -36,12 +37,12 @@ pub fn descending_facet_sort<'t>(
|
|||||||
|
|
||||||
struct DescendingFacetSort<'t> {
|
struct DescendingFacetSort<'t> {
|
||||||
rtxn: &'t heed::RoTxn<'t>,
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
stack: Vec<(
|
stack: Vec<(
|
||||||
RoaringBitmap,
|
RoaringBitmap,
|
||||||
std::iter::Take<
|
std::iter::Take<
|
||||||
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
heed::RoRevRange<'t, FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
>,
|
>,
|
||||||
Bound<&'t [u8]>,
|
Bound<&'t [u8]>,
|
||||||
)>,
|
)>,
|
||||||
@ -97,7 +98,7 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
|||||||
*right_bound = Bound::Excluded(left_bound);
|
*right_bound = Bound::Excluded(left_bound);
|
||||||
let iter = match self
|
let iter = match self
|
||||||
.db
|
.db
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
||||||
.rev_range(
|
.rev_range(
|
||||||
&self.rtxn,
|
&self.rtxn,
|
||||||
&(Bound::Included(starting_key_below), end_key_kelow),
|
&(Bound::Included(starting_key_below), end_key_kelow),
|
||||||
@ -121,7 +122,8 @@ impl<'t> Iterator for DescendingFacetSort<'t> {
|
|||||||
mod tests {
|
mod tests {
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec};
|
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||||
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
use crate::search::facet::facet_sort_descending::descending_facet_sort;
|
||||||
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
|
use crate::search::facet::tests::{get_random_looking_index, get_simple_index};
|
||||||
@ -134,7 +136,7 @@ mod tests {
|
|||||||
let txn = index.env.read_txn().unwrap();
|
let txn = index.env.read_txn().unwrap();
|
||||||
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
let candidates = (200..=300).into_iter().collect::<RoaringBitmap>();
|
||||||
let mut results = String::new();
|
let mut results = String::new();
|
||||||
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>();
|
let db = index.content.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>();
|
||||||
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
let iter = descending_facet_sort(&txn, db, 0, candidates).unwrap();
|
||||||
for el in iter {
|
for el in iter {
|
||||||
let docids = el.unwrap();
|
let docids = el.unwrap();
|
||||||
|
@ -5,8 +5,8 @@ use heed::{BytesDecode, RoTxn};
|
|||||||
|
|
||||||
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
pub use self::facet_distribution::{FacetDistribution, DEFAULT_VALUES_PER_FACET};
|
||||||
pub use self::filter::Filter;
|
pub use self::filter::Filter;
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
mod facet_distribution;
|
mod facet_distribution;
|
||||||
mod facet_distribution_iter;
|
mod facet_distribution_iter;
|
||||||
mod facet_range_search;
|
mod facet_range_search;
|
||||||
@ -17,7 +17,7 @@ mod filter;
|
|||||||
/// Get the first facet value in the facet database
|
/// Get the first facet value in the facet database
|
||||||
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
pub(crate) fn get_first_facet_value<'t, BoundCodec>(
|
||||||
txn: &'t RoTxn,
|
txn: &'t RoTxn,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||||
where
|
where
|
||||||
@ -42,7 +42,7 @@ where
|
|||||||
/// Get the last facet value in the facet database
|
/// Get the last facet value in the facet database
|
||||||
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
pub(crate) fn get_last_facet_value<'t, BoundCodec>(
|
||||||
txn: &'t RoTxn,
|
txn: &'t RoTxn,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<Option<BoundCodec::DItem>>
|
) -> heed::Result<Option<BoundCodec::DItem>>
|
||||||
where
|
where
|
||||||
@ -67,7 +67,7 @@ where
|
|||||||
/// Get the height of the highest level in the facet database
|
/// Get the height of the highest level in the facet database
|
||||||
pub(crate) fn get_highest_level<'t>(
|
pub(crate) fn get_highest_level<'t>(
|
||||||
txn: &'t RoTxn<'t>,
|
txn: &'t RoTxn<'t>,
|
||||||
db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
) -> heed::Result<u8> {
|
) -> heed::Result<u8> {
|
||||||
let field_id_prefix = &field_id.to_be_bytes();
|
let field_id_prefix = &field_id.to_be_bytes();
|
||||||
@ -77,7 +77,7 @@ pub(crate) fn get_highest_level<'t>(
|
|||||||
.next()
|
.next()
|
||||||
.map(|el| {
|
.map(|el| {
|
||||||
let (key, _) = el.unwrap();
|
let (key, _) = el.unwrap();
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key).unwrap();
|
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key).unwrap();
|
||||||
key.level
|
key.level
|
||||||
})
|
})
|
||||||
.unwrap_or(0))
|
.unwrap_or(0))
|
||||||
|
@ -11,8 +11,9 @@ use time::OffsetDateTime;
|
|||||||
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
use super::{FACET_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
use crate::update::index_documents::{create_writer, writer_into_reader};
|
use crate::update::index_documents::{create_writer, writer_into_reader};
|
||||||
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||||
|
|
||||||
@ -75,11 +76,11 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self;
|
let Self { index, field_ids, group_size, min_level_size, facet_type, new_data } = self;
|
||||||
|
|
||||||
let db = match facet_type {
|
let db = match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => index
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
.facet_id_string_docids
|
||||||
}
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -98,7 +99,7 @@ impl<'i> FacetsUpdateBulk<'i> {
|
|||||||
|
|
||||||
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
|
/// Implementation of `FacetsUpdateBulk` that is independent of milli's `Index` type
|
||||||
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
|
pub(crate) struct FacetsUpdateBulkInner<R: std::io::Read + std::io::Seek> {
|
||||||
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
pub new_data: Option<grenad::Reader<R>>,
|
pub new_data: Option<grenad::Reader<R>>,
|
||||||
pub group_size: u8,
|
pub group_size: u8,
|
||||||
pub min_level_size: u8,
|
pub min_level_size: u8,
|
||||||
@ -216,7 +217,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
.db
|
.db
|
||||||
.as_polymorph()
|
.as_polymorph()
|
||||||
.prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())?
|
.prefix_iter::<_, ByteSlice, ByteSlice>(rtxn, level_0_prefix.as_slice())?
|
||||||
.remap_types::<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>();
|
.remap_types::<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>();
|
||||||
|
|
||||||
let mut left_bound: &[u8] = &[];
|
let mut left_bound: &[u8] = &[];
|
||||||
let mut first_iteration_for_new_group = true;
|
let mut first_iteration_for_new_group = true;
|
||||||
@ -299,7 +300,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||||
{
|
{
|
||||||
let key = FacetGroupKey { field_id, level, left_bound };
|
let key = FacetGroupKey { field_id, level, left_bound };
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key)
|
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
|
||||||
.ok_or(Error::Encoding)?;
|
.ok_or(Error::Encoding)?;
|
||||||
let value = FacetGroupValue { size: group_size, bitmap };
|
let value = FacetGroupValue { size: group_size, bitmap };
|
||||||
let value =
|
let value =
|
||||||
@ -328,7 +329,7 @@ impl<R: std::io::Read + std::io::Seek> FacetsUpdateBulkInner<R> {
|
|||||||
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
bitmaps.drain(..).zip(left_bounds.drain(..)).zip(group_sizes.drain(..))
|
||||||
{
|
{
|
||||||
let key = FacetGroupKey { field_id, level, left_bound };
|
let key = FacetGroupKey { field_id, level, left_bound };
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key)
|
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key)
|
||||||
.ok_or(Error::Encoding)?;
|
.ok_or(Error::Encoding)?;
|
||||||
let value = FacetGroupValue { size: group_size, bitmap };
|
let value = FacetGroupValue { size: group_size, bitmap };
|
||||||
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
let value = FacetGroupValueCodec::bytes_encode(&value).ok_or(Error::Encoding)?;
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
use super::{FACET_GROUP_SIZE, FACET_MAX_GROUP_SIZE, FACET_MIN_LEVEL_SIZE};
|
||||||
use crate::{
|
use crate::{
|
||||||
facet::FacetType,
|
facet::FacetType,
|
||||||
heed_codec::facet::{ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
|
heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, FacetGroupValueCodec},
|
||||||
|
heed_codec::ByteSliceRefCodec,
|
||||||
update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner},
|
update::{FacetsUpdateBulk, FacetsUpdateIncrementalInner},
|
||||||
FieldId, Index, Result,
|
FieldId, Index, Result,
|
||||||
};
|
};
|
||||||
@ -11,7 +12,7 @@ use std::collections::{HashMap, HashSet};
|
|||||||
|
|
||||||
pub struct FacetsDelete<'i, 'b> {
|
pub struct FacetsDelete<'i, 'b> {
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
|
affected_facet_values: HashMap<FieldId, HashSet<Vec<u8>>>,
|
||||||
docids_to_delete: &'b RoaringBitmap,
|
docids_to_delete: &'b RoaringBitmap,
|
||||||
@ -27,11 +28,11 @@ impl<'i, 'b> FacetsDelete<'i, 'b> {
|
|||||||
docids_to_delete: &'b RoaringBitmap,
|
docids_to_delete: &'b RoaringBitmap,
|
||||||
) -> Self {
|
) -> Self {
|
||||||
let database = match facet_type {
|
let database = match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => index
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
.facet_id_string_docids
|
||||||
}
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Self {
|
Self {
|
||||||
|
@ -7,8 +7,9 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
use crate::{CboRoaringBitmapCodec, FieldId, Index, Result};
|
||||||
|
|
||||||
@ -50,10 +51,10 @@ impl<'i> FacetsUpdateIncremental<'i> {
|
|||||||
db: match facet_type {
|
db: match facet_type {
|
||||||
FacetType::String => index
|
FacetType::String => index
|
||||||
.facet_id_string_docids
|
.facet_id_string_docids
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
FacetType::Number => index
|
FacetType::Number => index
|
||||||
.facet_id_f64_docids
|
.facet_id_f64_docids
|
||||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>(),
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
},
|
},
|
||||||
group_size,
|
group_size,
|
||||||
max_group_size,
|
max_group_size,
|
||||||
@ -69,7 +70,7 @@ impl<'i> FacetsUpdateIncremental<'i> {
|
|||||||
|
|
||||||
let mut cursor = self.new_data.into_cursor()?;
|
let mut cursor = self.new_data.into_cursor()?;
|
||||||
while let Some((key, value)) = cursor.move_on_next()? {
|
while let Some((key, value)) = cursor.move_on_next()? {
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(key)
|
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(key)
|
||||||
.ok_or(heed::Error::Encoding)?;
|
.ok_or(heed::Error::Encoding)?;
|
||||||
let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
|
let docids = CboRoaringBitmapCodec::bytes_decode(value).ok_or(heed::Error::Encoding)?;
|
||||||
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?;
|
self.inner.insert(wtxn, key.field_id, key.left_bound, &docids)?;
|
||||||
@ -87,7 +88,7 @@ impl<'i> FacetsUpdateIncremental<'i> {
|
|||||||
|
|
||||||
/// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type
|
/// Implementation of `FacetsUpdateIncremental` that is independent of milli's `Index` type
|
||||||
pub struct FacetsUpdateIncrementalInner {
|
pub struct FacetsUpdateIncrementalInner {
|
||||||
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
pub db: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
pub group_size: u8,
|
pub group_size: u8,
|
||||||
pub min_level_size: u8,
|
pub min_level_size: u8,
|
||||||
pub max_group_size: u8,
|
pub max_group_size: u8,
|
||||||
@ -126,7 +127,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
if let Some(e) = prefix_iter.next() {
|
if let Some(e) = prefix_iter.next() {
|
||||||
let (key_bytes, value) = e?;
|
let (key_bytes, value) = e?;
|
||||||
Ok((
|
Ok((
|
||||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
|
||||||
.ok_or(Error::Encoding)?
|
.ok_or(Error::Encoding)?
|
||||||
.into_owned(),
|
.into_owned(),
|
||||||
value,
|
value,
|
||||||
@ -149,7 +150,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
)?;
|
)?;
|
||||||
let (key_bytes, value) = iter.next().unwrap()?;
|
let (key_bytes, value) = iter.next().unwrap()?;
|
||||||
Ok((
|
Ok((
|
||||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
|
||||||
.ok_or(Error::Encoding)?
|
.ok_or(Error::Encoding)?
|
||||||
.into_owned(),
|
.into_owned(),
|
||||||
value,
|
value,
|
||||||
@ -411,7 +412,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
let mut values = RoaringBitmap::new();
|
let mut values = RoaringBitmap::new();
|
||||||
for _ in 0..group_size {
|
for _ in 0..group_size {
|
||||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||||
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
|
||||||
.ok_or(Error::Encoding)?;
|
.ok_or(Error::Encoding)?;
|
||||||
|
|
||||||
if first_key.is_none() {
|
if first_key.is_none() {
|
||||||
@ -434,7 +435,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
let mut values = RoaringBitmap::new();
|
let mut values = RoaringBitmap::new();
|
||||||
for _ in 0..nbr_leftover_elements {
|
for _ in 0..nbr_leftover_elements {
|
||||||
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
let (key_bytes, value_i) = groups_iter.next().unwrap()?;
|
||||||
let key_i = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes)
|
let key_i = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes)
|
||||||
.ok_or(Error::Encoding)?;
|
.ok_or(Error::Encoding)?;
|
||||||
|
|
||||||
if first_key.is_none() {
|
if first_key.is_none() {
|
||||||
@ -616,7 +617,7 @@ impl FacetsUpdateIncrementalInner {
|
|||||||
while let Some(el) = iter.next() {
|
while let Some(el) = iter.next() {
|
||||||
let (k, _) = el?;
|
let (k, _) = el?;
|
||||||
to_delete.push(
|
to_delete.push(
|
||||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(k)
|
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(k)
|
||||||
.ok_or(Error::Encoding)?
|
.ok_or(Error::Encoding)?
|
||||||
.into_owned(),
|
.into_owned(),
|
||||||
);
|
);
|
||||||
@ -655,7 +656,8 @@ mod tests {
|
|||||||
use rand::{Rng, SeedableRng};
|
use rand::{Rng, SeedableRng};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::heed_codec::facet::{OrderedF64Codec, StrRefCodec};
|
use crate::heed_codec::facet::OrderedF64Codec;
|
||||||
|
use crate::heed_codec::StrRefCodec;
|
||||||
use crate::milli_snap;
|
use crate::milli_snap;
|
||||||
use crate::update::facet::tests::FacetIndex;
|
use crate::update::facet::tests::FacetIndex;
|
||||||
|
|
||||||
@ -1019,6 +1021,7 @@ mod tests {
|
|||||||
|
|
||||||
// fuzz tests
|
// fuzz tests
|
||||||
}
|
}
|
||||||
|
|
||||||
#[cfg(all(test, fuzzing))]
|
#[cfg(all(test, fuzzing))]
|
||||||
mod fuzz {
|
mod fuzz {
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
|
@ -77,7 +77,8 @@ pub const FACET_MIN_LEVEL_SIZE: u8 = 5;
|
|||||||
use self::incremental::FacetsUpdateIncremental;
|
use self::incremental::FacetsUpdateIncremental;
|
||||||
use super::FacetsUpdateBulk;
|
use super::FacetsUpdateBulk;
|
||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::{ByteSliceRef, FacetGroupKeyCodec, FacetGroupValueCodec};
|
use crate::heed_codec::facet::{FacetGroupKeyCodec, FacetGroupValueCodec};
|
||||||
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
use crate::{Index, Result};
|
use crate::{Index, Result};
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
|
|
||||||
@ -87,7 +88,7 @@ pub mod incremental;
|
|||||||
|
|
||||||
pub struct FacetsUpdate<'i> {
|
pub struct FacetsUpdate<'i> {
|
||||||
index: &'i Index,
|
index: &'i Index,
|
||||||
database: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
database: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
facet_type: FacetType,
|
facet_type: FacetType,
|
||||||
new_data: grenad::Reader<File>,
|
new_data: grenad::Reader<File>,
|
||||||
group_size: u8,
|
group_size: u8,
|
||||||
@ -97,11 +98,11 @@ pub struct FacetsUpdate<'i> {
|
|||||||
impl<'i> FacetsUpdate<'i> {
|
impl<'i> FacetsUpdate<'i> {
|
||||||
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
pub fn new(index: &'i Index, facet_type: FacetType, new_data: grenad::Reader<File>) -> Self {
|
||||||
let database = match facet_type {
|
let database = match facet_type {
|
||||||
FacetType::String => {
|
FacetType::String => index
|
||||||
index.facet_id_string_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
.facet_id_string_docids
|
||||||
}
|
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||||
FacetType::Number => {
|
FacetType::Number => {
|
||||||
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRef>>()
|
index.facet_id_f64_docids.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>()
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
Self {
|
Self {
|
||||||
@ -159,8 +160,9 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
use super::bulk::FacetsUpdateBulkInner;
|
use super::bulk::FacetsUpdateBulkInner;
|
||||||
use crate::heed_codec::facet::{
|
use crate::heed_codec::facet::{
|
||||||
ByteSliceRef, FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
FacetGroupKey, FacetGroupKeyCodec, FacetGroupValue, FacetGroupValueCodec,
|
||||||
};
|
};
|
||||||
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
use crate::search::facet::get_highest_level;
|
use crate::search::facet::get_highest_level;
|
||||||
use crate::snapshot_tests::display_bitmap;
|
use crate::snapshot_tests::display_bitmap;
|
||||||
use crate::update::FacetsUpdateIncrementalInner;
|
use crate::update::FacetsUpdateIncrementalInner;
|
||||||
@ -173,7 +175,7 @@ pub(crate) mod tests {
|
|||||||
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
BytesEncode<'a> + BytesDecode<'a, DItem = <BoundCodec as BytesEncode<'a>>::EItem>,
|
||||||
{
|
{
|
||||||
pub env: Env,
|
pub env: Env,
|
||||||
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRef>, FacetGroupValueCodec>,
|
pub content: heed::Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
pub group_size: Cell<u8>,
|
pub group_size: Cell<u8>,
|
||||||
pub min_level_size: Cell<u8>,
|
pub min_level_size: Cell<u8>,
|
||||||
pub max_group_size: Cell<u8>,
|
pub max_group_size: Cell<u8>,
|
||||||
@ -327,7 +329,7 @@ pub(crate) mod tests {
|
|||||||
let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned();
|
let left_bound_bytes = BoundCodec::bytes_encode(left_bound).unwrap().into_owned();
|
||||||
let key: FacetGroupKey<&[u8]> =
|
let key: FacetGroupKey<&[u8]> =
|
||||||
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
|
FacetGroupKey { field_id: *field_id, level: 0, left_bound: &left_bound_bytes };
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_encode(&key).unwrap();
|
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_encode(&key).unwrap();
|
||||||
let value = CboRoaringBitmapCodec::bytes_encode(&docids).unwrap();
|
let value = CboRoaringBitmapCodec::bytes_encode(&docids).unwrap();
|
||||||
writer.insert(&key, &value).unwrap();
|
writer.insert(&key, &value).unwrap();
|
||||||
}
|
}
|
||||||
@ -362,7 +364,7 @@ pub(crate) mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
while let Some(el) = iter.next() {
|
while let Some(el) = iter.next() {
|
||||||
let (key, value) = el.unwrap();
|
let (key, value) = el.unwrap();
|
||||||
let key = FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key).unwrap();
|
let key = FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key).unwrap();
|
||||||
|
|
||||||
let mut prefix_start_below = vec![];
|
let mut prefix_start_below = vec![];
|
||||||
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
|
prefix_start_below.extend_from_slice(&field_id.to_be_bytes());
|
||||||
@ -379,7 +381,7 @@ pub(crate) mod tests {
|
|||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
|
let (key_bytes, _) = start_below_iter.next().unwrap().unwrap();
|
||||||
FacetGroupKeyCodec::<ByteSliceRef>::bytes_decode(&key_bytes).unwrap()
|
FacetGroupKeyCodec::<ByteSliceRefCodec>::bytes_decode(&key_bytes).unwrap()
|
||||||
};
|
};
|
||||||
|
|
||||||
assert!(value.size > 0);
|
assert!(value.size > 0);
|
||||||
|
@ -4,7 +4,8 @@ use std::io;
|
|||||||
use heed::BytesEncode;
|
use heed::BytesEncode;
|
||||||
|
|
||||||
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
use super::helpers::{create_sorter, sorter_into_reader, try_split_array_at, GrenadParameters};
|
||||||
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec, StrRefCodec};
|
use crate::heed_codec::facet::{FacetGroupKey, FacetGroupKeyCodec};
|
||||||
|
use crate::heed_codec::StrRefCodec;
|
||||||
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
use crate::update::index_documents::merge_cbo_roaring_bitmaps;
|
||||||
use crate::{FieldId, Result};
|
use crate::{FieldId, Result};
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user