mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 08:44:27 +01:00
Introduce the FacetStringIter iterator
This commit is contained in:
parent
a79661c6dc
commit
adfd4da24c
@ -147,7 +147,7 @@ impl<'t> FacetNumberIter<'t> {
|
|||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
documents_ids: RoaringBitmap,
|
documents_ids: RoaringBitmap,
|
||||||
) -> heed::Result<FacetNumberIter<'t>> {
|
) -> heed::Result<FacetNumberIter<'t>> {
|
||||||
let db = index.facet_id_f64_docids.remap_key_type::<FacetLevelValueF64Codec>();
|
let db = index.facet_id_f64_docids;
|
||||||
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||||
let highest_iter =
|
let highest_iter =
|
||||||
FacetNumberRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
FacetNumberRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?;
|
||||||
|
@ -127,9 +127,10 @@
|
|||||||
|
|
||||||
use std::num::NonZeroU8;
|
use std::num::NonZeroU8;
|
||||||
use std::ops::Bound;
|
use std::ops::Bound;
|
||||||
use std::ops::Bound::{Excluded, Included};
|
use std::ops::Bound::{Excluded, Included, Unbounded};
|
||||||
|
|
||||||
use heed::types::{ByteSlice, Str};
|
use either::{Either, Left, Right};
|
||||||
|
use heed::types::{ByteSlice, DecodeIgnore, Str};
|
||||||
use heed::{Database, LazyDecode, RoRange};
|
use heed::{Database, LazyDecode, RoRange};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
@ -137,7 +138,7 @@ use crate::heed_codec::facet::{
|
|||||||
FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringZeroBoundsValueCodec,
|
FacetLevelValueU32Codec, FacetStringLevelZeroCodec, FacetStringZeroBoundsValueCodec,
|
||||||
};
|
};
|
||||||
use crate::heed_codec::CboRoaringBitmapCodec;
|
use crate::heed_codec::CboRoaringBitmapCodec;
|
||||||
use crate::FieldId;
|
use crate::{FieldId, Index};
|
||||||
|
|
||||||
/// An iterator that is used to explore the facets level strings
|
/// An iterator that is used to explore the facets level strings
|
||||||
/// from the level 1 to infinity.
|
/// from the level 1 to infinity.
|
||||||
@ -155,17 +156,18 @@ pub struct FacetStringGroupRange<'t> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'t> FacetStringGroupRange<'t> {
|
impl<'t> FacetStringGroupRange<'t> {
|
||||||
pub fn new(
|
pub fn new<X, Y>(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
db: Database<
|
db: Database<X, Y>,
|
||||||
FacetLevelValueU32Codec,
|
|
||||||
FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>,
|
|
||||||
>,
|
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
level: NonZeroU8,
|
level: NonZeroU8,
|
||||||
left: Bound<u32>,
|
left: Bound<u32>,
|
||||||
right: Bound<u32>,
|
right: Bound<u32>,
|
||||||
) -> heed::Result<FacetStringGroupRange<'t>> {
|
) -> heed::Result<FacetStringGroupRange<'t>> {
|
||||||
|
let db = db.remap_types::<
|
||||||
|
FacetLevelValueU32Codec,
|
||||||
|
FacetStringZeroBoundsValueCodec<CboRoaringBitmapCodec>,
|
||||||
|
>();
|
||||||
let left_bound = match left {
|
let left_bound = match left {
|
||||||
Included(left) => Included((field_id, level, left, u32::MIN)),
|
Included(left) => Included((field_id, level, left, u32::MIN)),
|
||||||
Excluded(left) => Excluded((field_id, level, left, u32::MIN)),
|
Excluded(left) => Excluded((field_id, level, left, u32::MIN)),
|
||||||
@ -211,13 +213,14 @@ pub struct FacetStringLevelZeroRange<'t> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'t> FacetStringLevelZeroRange<'t> {
|
impl<'t> FacetStringLevelZeroRange<'t> {
|
||||||
pub fn new(
|
pub fn new<X, Y>(
|
||||||
rtxn: &'t heed::RoTxn,
|
rtxn: &'t heed::RoTxn,
|
||||||
db: Database<FacetStringLevelZeroCodec, CboRoaringBitmapCodec>,
|
db: Database<X, Y>,
|
||||||
field_id: FieldId,
|
field_id: FieldId,
|
||||||
left: Bound<&str>,
|
left: Bound<&str>,
|
||||||
right: Bound<&str>,
|
right: Bound<&str>,
|
||||||
) -> heed::Result<FacetStringLevelZeroRange<'t>> {
|
) -> heed::Result<FacetStringLevelZeroRange<'t>> {
|
||||||
|
let db = db.remap_types::<FacetStringLevelZeroCodec, CboRoaringBitmapCodec>();
|
||||||
let left_bound = match left {
|
let left_bound = match left {
|
||||||
Included(left) => Included((field_id, left)),
|
Included(left) => Included((field_id, left)),
|
||||||
Excluded(left) => Excluded((field_id, left)),
|
Excluded(left) => Excluded((field_id, left)),
|
||||||
@ -245,3 +248,129 @@ impl<'t> Iterator for FacetStringLevelZeroRange<'t> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An iterator that is used to explore the facet strings level by level,
|
||||||
|
/// it will only return facets strings that are associated with the
|
||||||
|
/// candidates documents ids given.
|
||||||
|
pub struct FacetStringIter<'t> {
|
||||||
|
rtxn: &'t heed::RoTxn<'t>,
|
||||||
|
db: Database<ByteSlice, ByteSlice>,
|
||||||
|
field_id: FieldId,
|
||||||
|
level_iters:
|
||||||
|
Vec<(RoaringBitmap, Either<FacetStringGroupRange<'t>, FacetStringLevelZeroRange<'t>>)>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t> FacetStringIter<'t> {
|
||||||
|
pub fn new_non_reducing(
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
|
index: &'t Index,
|
||||||
|
field_id: FieldId,
|
||||||
|
documents_ids: RoaringBitmap,
|
||||||
|
) -> heed::Result<FacetStringIter<'t>> {
|
||||||
|
// TODO make sure that we change the database before using it, or merging the PR.
|
||||||
|
let db = index.facet_id_string_docids.remap_types::<ByteSlice, ByteSlice>();
|
||||||
|
let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0);
|
||||||
|
let highest_iter = match NonZeroU8::new(highest_level) {
|
||||||
|
Some(highest_level) => Left(FacetStringGroupRange::new(
|
||||||
|
rtxn,
|
||||||
|
index.facet_id_string_docids,
|
||||||
|
field_id,
|
||||||
|
highest_level,
|
||||||
|
Unbounded,
|
||||||
|
Unbounded,
|
||||||
|
)?),
|
||||||
|
None => Right(FacetStringLevelZeroRange::new(
|
||||||
|
rtxn,
|
||||||
|
index.facet_id_string_docids,
|
||||||
|
field_id,
|
||||||
|
Unbounded,
|
||||||
|
Unbounded,
|
||||||
|
)?),
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(FacetStringIter { rtxn, db, field_id, level_iters: vec![(documents_ids, highest_iter)] })
|
||||||
|
}
|
||||||
|
|
||||||
|
fn highest_level<X, Y>(
|
||||||
|
rtxn: &'t heed::RoTxn,
|
||||||
|
db: Database<X, Y>,
|
||||||
|
fid: FieldId,
|
||||||
|
) -> heed::Result<Option<u8>> {
|
||||||
|
Ok(db
|
||||||
|
.remap_types::<ByteSlice, DecodeIgnore>()
|
||||||
|
.prefix_iter(rtxn, &[fid][..])? // the field id is the first bit
|
||||||
|
.last()
|
||||||
|
.transpose()?
|
||||||
|
.map(|(key_bytes, _)| key_bytes[1])) // the level is the second bit
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'t> Iterator for FacetStringIter<'t> {
|
||||||
|
type Item = heed::Result<(&'t str, RoaringBitmap)>;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
'outer: loop {
|
||||||
|
let (documents_ids, last) = self.level_iters.last_mut()?;
|
||||||
|
match last {
|
||||||
|
Left(last) => {
|
||||||
|
for result in last {
|
||||||
|
match result {
|
||||||
|
Ok(((level, left, right), (string_bounds, mut docids))) => {
|
||||||
|
docids &= &*documents_ids;
|
||||||
|
if !docids.is_empty() {
|
||||||
|
*documents_ids -= &docids;
|
||||||
|
|
||||||
|
let result = match string_bounds {
|
||||||
|
Some((left, right)) => FacetStringLevelZeroRange::new(
|
||||||
|
self.rtxn,
|
||||||
|
self.db,
|
||||||
|
self.field_id,
|
||||||
|
Included(left),
|
||||||
|
Included(right),
|
||||||
|
)
|
||||||
|
.map(Right),
|
||||||
|
None => FacetStringGroupRange::new(
|
||||||
|
self.rtxn,
|
||||||
|
self.db,
|
||||||
|
self.field_id,
|
||||||
|
NonZeroU8::new(level.get() - 1).unwrap(),
|
||||||
|
Included(left),
|
||||||
|
Included(right),
|
||||||
|
)
|
||||||
|
.map(Left),
|
||||||
|
};
|
||||||
|
|
||||||
|
match result {
|
||||||
|
Ok(iter) => {
|
||||||
|
self.level_iters.push((docids, iter));
|
||||||
|
continue 'outer;
|
||||||
|
}
|
||||||
|
Err(e) => return Some(Err(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => return Some(Err(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Right(last) => {
|
||||||
|
// level zero only
|
||||||
|
for result in last {
|
||||||
|
match result {
|
||||||
|
Ok((value, mut docids)) => {
|
||||||
|
docids &= &*documents_ids;
|
||||||
|
if !docids.is_empty() {
|
||||||
|
*documents_ids -= &docids;
|
||||||
|
return Some(Ok((value, docids)));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Err(e) => return Some(Err(e)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
self.level_iters.pop();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user