From f858f64b1f0f69d791d3ba38b52ea1d002faacad Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 23 Jun 2021 10:29:00 +0200 Subject: [PATCH] Move the facet number iterators into their own module --- milli/src/search/criteria/asc_desc.rs | 9 +- milli/src/search/facet/facet_distribution.rs | 6 +- milli/src/search/facet/facet_number.rs | 248 +++++++++++++++++++ milli/src/search/facet/filter_condition.rs | 4 +- milli/src/search/facet/mod.rs | 248 +------------------ milli/src/search/mod.rs | 2 +- 6 files changed, 262 insertions(+), 255 deletions(-) create mode 100644 milli/src/search/facet/facet_number.rs diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index ccee2c393..99d63c90d 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -8,7 +8,7 @@ use roaring::RoaringBitmap; use super::{Criterion, CriterionParameters, CriterionResult}; use crate::error::FieldIdMapMissingEntry; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; -use crate::search::facet::FacetIter; +use crate::search::facet::FacetNumberIter; use crate::search::query_tree::Operation; use crate::{FieldId, Index, Result}; @@ -172,8 +172,11 @@ fn facet_ordered<'t>( let iter = iterative_facet_ordered_iter(index, rtxn, field_id, ascending, candidates)?; Ok(Box::new(iter.map(Ok)) as Box>) } else { - let facet_fn = - if ascending { FacetIter::new_reducing } else { FacetIter::new_reverse_reducing }; + let facet_fn = if ascending { + FacetNumberIter::new_reducing + } else { + FacetNumberIter::new_reverse_reducing + }; let iter = facet_fn(rtxn, index, field_id, candidates)?; Ok(Box::new(iter.map(|res| res.map(|(_, docids)| docids)))) } diff --git a/milli/src/search/facet/facet_distribution.rs b/milli/src/search/facet/facet_distribution.rs index b0b22ac49..080fd9af7 100644 --- a/milli/src/search/facet/facet_distribution.rs +++ b/milli/src/search/facet/facet_distribution.rs @@ -9,7 +9,7 @@ use roaring::RoaringBitmap; use crate::error::{FieldIdMapMissingEntry, UserError}; use crate::facet::FacetType; use crate::heed_codec::facet::FacetValueStringCodec; -use crate::search::facet::{FacetIter, FacetRange}; +use crate::search::facet::{FacetNumberIter, FacetNumberRange}; use crate::{DocumentId, FieldId, Index, Result}; /// The default number of values by facets that will @@ -118,7 +118,7 @@ impl<'a> FacetDistribution<'a> { distribution: &mut BTreeMap, ) -> heed::Result<()> { let iter = - FacetIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?; + FacetNumberIter::new_non_reducing(self.rtxn, self.index, field_id, candidates.clone())?; for result in iter { let (value, mut docids) = result?; @@ -143,7 +143,7 @@ impl<'a> FacetDistribution<'a> { let mut distribution = BTreeMap::new(); let db = self.index.facet_id_f64_docids; - let range = FacetRange::new(self.rtxn, db, field_id, 0, Unbounded, Unbounded)?; + let range = FacetNumberRange::new(self.rtxn, db, field_id, 0, Unbounded, Unbounded)?; for result in range { let ((_, _, value, _), docids) = result?; diff --git a/milli/src/search/facet/facet_number.rs b/milli/src/search/facet/facet_number.rs new file mode 100644 index 000000000..f943b96da --- /dev/null +++ b/milli/src/search/facet/facet_number.rs @@ -0,0 +1,248 @@ +use std::ops::Bound::{self, Excluded, Included, Unbounded}; + +use either::Either::{self, Left, Right}; +use heed::types::{ByteSlice, DecodeIgnore}; +use heed::{Database, LazyDecode, RoRange, RoRevRange}; +use roaring::RoaringBitmap; + +use crate::heed_codec::facet::FacetLevelValueF64Codec; +use crate::heed_codec::CboRoaringBitmapCodec; +use crate::{FieldId, Index}; + +pub struct FacetNumberRange<'t> { + iter: RoRange<'t, FacetLevelValueF64Codec, LazyDecode>, + end: Bound, +} + +impl<'t> FacetNumberRange<'t> { + pub fn new( + rtxn: &'t heed::RoTxn, + db: Database, + field_id: FieldId, + level: u8, + left: Bound, + right: Bound, + ) -> heed::Result> { + let left_bound = match left { + Included(left) => Included((field_id, level, left, f64::MIN)), + Excluded(left) => Excluded((field_id, level, left, f64::MIN)), + Unbounded => Included((field_id, level, f64::MIN, f64::MIN)), + }; + let right_bound = Included((field_id, level, f64::MAX, f64::MAX)); + let iter = db.lazily_decode_data().range(rtxn, &(left_bound, right_bound))?; + Ok(FacetNumberRange { iter, end: right }) + } +} + +impl<'t> Iterator for FacetNumberRange<'t> { + type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>; + + fn next(&mut self) -> Option { + match self.iter.next() { + Some(Ok(((fid, level, left, right), docids))) => { + let must_be_returned = match self.end { + Included(end) => right <= end, + Excluded(end) => right < end, + Unbounded => true, + }; + if must_be_returned { + match docids.decode() { + Ok(docids) => Some(Ok(((fid, level, left, right), docids))), + Err(e) => Some(Err(e)), + } + } else { + None + } + } + Some(Err(e)) => Some(Err(e)), + None => None, + } + } +} + +pub struct FacetNumberRevRange<'t> { + iter: RoRevRange<'t, FacetLevelValueF64Codec, LazyDecode>, + end: Bound, +} + +impl<'t> FacetNumberRevRange<'t> { + pub fn new( + rtxn: &'t heed::RoTxn, + db: Database, + field_id: FieldId, + level: u8, + left: Bound, + right: Bound, + ) -> heed::Result> { + let left_bound = match left { + Included(left) => Included((field_id, level, left, f64::MIN)), + Excluded(left) => Excluded((field_id, level, left, f64::MIN)), + Unbounded => Included((field_id, level, f64::MIN, f64::MIN)), + }; + let right_bound = Included((field_id, level, f64::MAX, f64::MAX)); + let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?; + Ok(FacetNumberRevRange { iter, end: right }) + } +} + +impl<'t> Iterator for FacetNumberRevRange<'t> { + type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>; + + fn next(&mut self) -> Option { + loop { + match self.iter.next() { + Some(Ok(((fid, level, left, right), docids))) => { + let must_be_returned = match self.end { + Included(end) => right <= end, + Excluded(end) => right < end, + Unbounded => true, + }; + if must_be_returned { + match docids.decode() { + Ok(docids) => return Some(Ok(((fid, level, left, right), docids))), + Err(e) => return Some(Err(e)), + } + } + continue; + } + Some(Err(e)) => return Some(Err(e)), + None => return None, + } + } + } +} + +pub struct FacetNumberIter<'t> { + rtxn: &'t heed::RoTxn<'t>, + db: Database, + field_id: FieldId, + level_iters: Vec<(RoaringBitmap, Either, FacetNumberRevRange<'t>>)>, + must_reduce: bool, +} + +impl<'t> FacetNumberIter<'t> { + /// Create a `FacetNumberIter` that will iterate on the different facet entries + /// (facet value + documents ids) and that will reduce the given documents ids + /// while iterating on the different facet levels. + pub fn new_reducing( + rtxn: &'t heed::RoTxn, + index: &'t Index, + field_id: FieldId, + documents_ids: RoaringBitmap, + ) -> heed::Result> { + let db = index.facet_id_f64_docids.remap_key_type::(); + let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); + let highest_iter = + FacetNumberRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; + let level_iters = vec![(documents_ids, Left(highest_iter))]; + Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: true }) + } + + /// Create a `FacetNumberIter` that will iterate on the different facet entries in reverse + /// (facet value + documents ids) and that will reduce the given documents ids + /// while iterating on the different facet levels. + pub fn new_reverse_reducing( + rtxn: &'t heed::RoTxn, + index: &'t Index, + field_id: FieldId, + documents_ids: RoaringBitmap, + ) -> heed::Result> { + let db = index.facet_id_f64_docids.remap_key_type::(); + let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); + let highest_iter = + FacetNumberRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; + let level_iters = vec![(documents_ids, Right(highest_iter))]; + Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: true }) + } + + /// Create a `FacetNumberIter` that will iterate on the different facet entries + /// (facet value + documents ids) and that will not reduce the given documents ids + /// while iterating on the different facet levels, possibly returning multiple times + /// a document id associated with multiple facet values. + pub fn new_non_reducing( + rtxn: &'t heed::RoTxn, + index: &'t Index, + field_id: FieldId, + documents_ids: RoaringBitmap, + ) -> heed::Result> { + let db = index.facet_id_f64_docids.remap_key_type::(); + let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); + let highest_iter = + FacetNumberRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; + let level_iters = vec![(documents_ids, Left(highest_iter))]; + Ok(FacetNumberIter { rtxn, db, field_id, level_iters, must_reduce: false }) + } + + fn highest_level( + rtxn: &'t heed::RoTxn, + db: Database, + fid: FieldId, + ) -> heed::Result> { + let level = db + .remap_types::() + .prefix_iter(rtxn, &fid.to_be_bytes())? + .remap_key_type::() + .last() + .transpose()? + .map(|((_, level, _, _), _)| level); + Ok(level) + } +} + +impl<'t> Iterator for FacetNumberIter<'t> { + type Item = heed::Result<(f64, RoaringBitmap)>; + + fn next(&mut self) -> Option { + 'outer: loop { + let (documents_ids, last) = self.level_iters.last_mut()?; + let is_ascending = last.is_left(); + for result in last { + // If the last iterator must find an empty set of documents it means + // that we found all the documents in the sub level iterations already, + // we can pop this level iterator. + if documents_ids.is_empty() { + break; + } + + match result { + Ok(((_fid, level, left, right), mut docids)) => { + docids &= &*documents_ids; + if !docids.is_empty() { + if self.must_reduce { + *documents_ids -= &docids; + } + + if level == 0 { + return Some(Ok((left, docids))); + } + + let rtxn = self.rtxn; + let db = self.db; + let fid = self.field_id; + let left = Included(left); + let right = Included(right); + + let result = if is_ascending { + FacetNumberRange::new(rtxn, db, fid, level - 1, left, right) + .map(Left) + } else { + FacetNumberRevRange::new(rtxn, db, fid, level - 1, left, right) + .map(Right) + }; + + match result { + Ok(iter) => { + self.level_iters.push((docids, iter)); + continue 'outer; + } + Err(e) => return Some(Err(e)), + } + } + } + Err(e) => return Some(Err(e)), + } + } + self.level_iters.pop(); + } + } +} diff --git a/milli/src/search/facet/filter_condition.rs b/milli/src/search/facet/filter_condition.rs index 1b1eafcab..875fe3b27 100644 --- a/milli/src/search/facet/filter_condition.rs +++ b/milli/src/search/facet/filter_condition.rs @@ -15,7 +15,7 @@ use roaring::RoaringBitmap; use self::FilterCondition::*; use self::Operator::*; use super::parser::{FilterParser, Rule, PREC_CLIMBER}; -use super::FacetRange; +use super::FacetNumberRange; use crate::error::UserError; use crate::heed_codec::facet::{FacetLevelValueF64Codec, FacetValueStringCodec}; use crate::{CboRoaringBitmapCodec, FieldId, FieldsIdsMap, Index, Result}; @@ -282,7 +282,7 @@ impl FilterCondition { // We must create a custom iterator to be able to iterate over the // requested range as the range iterator cannot express some conditions. - let iter = FacetRange::new(rtxn, db, field_id, level, left, right)?; + let iter = FacetNumberRange::new(rtxn, db, field_id, level, left, right)?; debug!("Iterating between {:?} and {:?} (level {})", left, right, level); diff --git a/milli/src/search/facet/mod.rs b/milli/src/search/facet/mod.rs index 9774bdd52..e6ea92543 100644 --- a/milli/src/search/facet/mod.rs +++ b/milli/src/search/facet/mod.rs @@ -1,253 +1,9 @@ -use std::ops::Bound::{self, Excluded, Included, Unbounded}; - -use either::Either::{self, Left, Right}; -use heed::types::{ByteSlice, DecodeIgnore}; -use heed::{Database, LazyDecode, RoRange, RoRevRange}; -use roaring::RoaringBitmap; - pub use self::facet_distribution::FacetDistribution; +pub use self::facet_number::{FacetNumberIter, FacetNumberRange, FacetNumberRevRange}; pub use self::filter_condition::{FilterCondition, Operator}; pub(crate) use self::parser::Rule as ParserRule; -use crate::heed_codec::facet::FacetLevelValueF64Codec; -use crate::heed_codec::CboRoaringBitmapCodec; -use crate::{FieldId, Index}; mod facet_distribution; +mod facet_number; mod filter_condition; mod parser; - -pub struct FacetRange<'t> { - iter: RoRange<'t, FacetLevelValueF64Codec, LazyDecode>, - end: Bound, -} - -impl<'t> FacetRange<'t> { - pub fn new( - rtxn: &'t heed::RoTxn, - db: Database, - field_id: FieldId, - level: u8, - left: Bound, - right: Bound, - ) -> heed::Result> { - let left_bound = match left { - Included(left) => Included((field_id, level, left, f64::MIN)), - Excluded(left) => Excluded((field_id, level, left, f64::MIN)), - Unbounded => Included((field_id, level, f64::MIN, f64::MIN)), - }; - let right_bound = Included((field_id, level, f64::MAX, f64::MAX)); - let iter = db.lazily_decode_data().range(rtxn, &(left_bound, right_bound))?; - Ok(FacetRange { iter, end: right }) - } -} - -impl<'t> Iterator for FacetRange<'t> { - type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>; - - fn next(&mut self) -> Option { - match self.iter.next() { - Some(Ok(((fid, level, left, right), docids))) => { - let must_be_returned = match self.end { - Included(end) => right <= end, - Excluded(end) => right < end, - Unbounded => true, - }; - if must_be_returned { - match docids.decode() { - Ok(docids) => Some(Ok(((fid, level, left, right), docids))), - Err(e) => Some(Err(e)), - } - } else { - None - } - } - Some(Err(e)) => Some(Err(e)), - None => None, - } - } -} - -pub struct FacetRevRange<'t> { - iter: RoRevRange<'t, FacetLevelValueF64Codec, LazyDecode>, - end: Bound, -} - -impl<'t> FacetRevRange<'t> { - pub fn new( - rtxn: &'t heed::RoTxn, - db: Database, - field_id: FieldId, - level: u8, - left: Bound, - right: Bound, - ) -> heed::Result> { - let left_bound = match left { - Included(left) => Included((field_id, level, left, f64::MIN)), - Excluded(left) => Excluded((field_id, level, left, f64::MIN)), - Unbounded => Included((field_id, level, f64::MIN, f64::MIN)), - }; - let right_bound = Included((field_id, level, f64::MAX, f64::MAX)); - let iter = db.lazily_decode_data().rev_range(rtxn, &(left_bound, right_bound))?; - Ok(FacetRevRange { iter, end: right }) - } -} - -impl<'t> Iterator for FacetRevRange<'t> { - type Item = heed::Result<((FieldId, u8, f64, f64), RoaringBitmap)>; - - fn next(&mut self) -> Option { - loop { - match self.iter.next() { - Some(Ok(((fid, level, left, right), docids))) => { - let must_be_returned = match self.end { - Included(end) => right <= end, - Excluded(end) => right < end, - Unbounded => true, - }; - if must_be_returned { - match docids.decode() { - Ok(docids) => return Some(Ok(((fid, level, left, right), docids))), - Err(e) => return Some(Err(e)), - } - } - continue; - } - Some(Err(e)) => return Some(Err(e)), - None => return None, - } - } - } -} - -pub struct FacetIter<'t> { - rtxn: &'t heed::RoTxn<'t>, - db: Database, - field_id: FieldId, - level_iters: Vec<(RoaringBitmap, Either, FacetRevRange<'t>>)>, - must_reduce: bool, -} - -impl<'t> FacetIter<'t> { - /// Create a `FacetIter` that will iterate on the different facet entries - /// (facet value + documents ids) and that will reduce the given documents ids - /// while iterating on the different facet levels. - pub fn new_reducing( - rtxn: &'t heed::RoTxn, - index: &'t Index, - field_id: FieldId, - documents_ids: RoaringBitmap, - ) -> heed::Result> { - let db = index.facet_id_f64_docids.remap_key_type::(); - let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); - let highest_iter = - FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; - let level_iters = vec![(documents_ids, Left(highest_iter))]; - Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true }) - } - - /// Create a `FacetIter` that will iterate on the different facet entries in reverse - /// (facet value + documents ids) and that will reduce the given documents ids - /// while iterating on the different facet levels. - pub fn new_reverse_reducing( - rtxn: &'t heed::RoTxn, - index: &'t Index, - field_id: FieldId, - documents_ids: RoaringBitmap, - ) -> heed::Result> { - let db = index.facet_id_f64_docids.remap_key_type::(); - let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); - let highest_iter = - FacetRevRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; - let level_iters = vec![(documents_ids, Right(highest_iter))]; - Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: true }) - } - - /// Create a `FacetIter` that will iterate on the different facet entries - /// (facet value + documents ids) and that will not reduce the given documents ids - /// while iterating on the different facet levels, possibly returning multiple times - /// a document id associated with multiple facet values. - pub fn new_non_reducing( - rtxn: &'t heed::RoTxn, - index: &'t Index, - field_id: FieldId, - documents_ids: RoaringBitmap, - ) -> heed::Result> { - let db = index.facet_id_f64_docids.remap_key_type::(); - let highest_level = Self::highest_level(rtxn, db, field_id)?.unwrap_or(0); - let highest_iter = - FacetRange::new(rtxn, db, field_id, highest_level, Unbounded, Unbounded)?; - let level_iters = vec![(documents_ids, Left(highest_iter))]; - Ok(FacetIter { rtxn, db, field_id, level_iters, must_reduce: false }) - } - - fn highest_level( - rtxn: &'t heed::RoTxn, - db: Database, - fid: FieldId, - ) -> heed::Result> { - let level = db - .remap_types::() - .prefix_iter(rtxn, &fid.to_be_bytes())? - .remap_key_type::() - .last() - .transpose()? - .map(|((_, level, _, _), _)| level); - Ok(level) - } -} - -impl<'t> Iterator for FacetIter<'t> { - type Item = heed::Result<(f64, RoaringBitmap)>; - - fn next(&mut self) -> Option { - 'outer: loop { - let (documents_ids, last) = self.level_iters.last_mut()?; - let is_ascending = last.is_left(); - for result in last { - // If the last iterator must find an empty set of documents it means - // that we found all the documents in the sub level iterations already, - // we can pop this level iterator. - if documents_ids.is_empty() { - break; - } - - match result { - Ok(((_fid, level, left, right), mut docids)) => { - docids &= &*documents_ids; - if !docids.is_empty() { - if self.must_reduce { - *documents_ids -= &docids; - } - - if level == 0 { - return Some(Ok((left, docids))); - } - - let rtxn = self.rtxn; - let db = self.db; - let fid = self.field_id; - let left = Included(left); - let right = Included(right); - - let result = if is_ascending { - FacetRange::new(rtxn, db, fid, level - 1, left, right).map(Left) - } else { - FacetRevRange::new(rtxn, db, fid, level - 1, left, right).map(Right) - }; - - match result { - Ok(iter) => { - self.level_iters.push((docids, iter)); - continue 'outer; - } - Err(e) => return Some(Err(e)), - } - } - } - Err(e) => return Some(Err(e)), - } - } - self.level_iters.pop(); - } - } -} diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index f40a6aed6..574459547 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -15,7 +15,7 @@ use once_cell::sync::Lazy; use roaring::bitmap::RoaringBitmap; pub(crate) use self::facet::ParserRule; -pub use self::facet::{FacetDistribution, FacetIter, FilterCondition, Operator}; +pub use self::facet::{FacetDistribution, FacetNumberIter, FilterCondition, Operator}; pub use self::matching_words::MatchingWords; use self::query_tree::QueryTreeBuilder; use crate::error::FieldIdMapMissingEntry;