From a3944a7083ebe3a4729051b59da03f30d2e78512 Mon Sep 17 00:00:00 2001 From: many Date: Mon, 10 May 2021 12:33:37 +0200 Subject: [PATCH] Introduce a filtered_candidates field --- milli/src/search/criteria/asc_desc.rs | 20 ++++++++++---------- milli/src/search/criteria/attribute.rs | 16 +++++++++++++--- milli/src/search/criteria/exactness.rs | 16 +++++++++++----- milli/src/search/criteria/final.rs | 19 ++++++++++++------- milli/src/search/criteria/initial.rs | 5 +++-- milli/src/search/criteria/mod.rs | 2 ++ milli/src/search/criteria/proximity.rs | 12 +++++++++--- milli/src/search/criteria/typo.rs | 20 ++++++++++++++------ milli/src/search/criteria/words.rs | 16 +++++++++------- 9 files changed, 83 insertions(+), 43 deletions(-) diff --git a/milli/src/search/criteria/asc_desc.rs b/milli/src/search/criteria/asc_desc.rs index 31edc453e..0511ce319 100644 --- a/milli/src/search/criteria/asc_desc.rs +++ b/milli/src/search/criteria/asc_desc.rs @@ -93,23 +93,22 @@ impl<'t> Criterion for AscDesc<'t> { match self.candidates.next().transpose()? { None => { match self.parent.next(params)? { - Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { + Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => { self.query_tree = query_tree; - let candidates = match (&self.query_tree, candidates) { - (_, Some(mut candidates)) => { - candidates.intersect_with(&self.faceted_candidates); - candidates - }, + let mut candidates = match (&self.query_tree, candidates) { + (_, Some(candidates)) => candidates & &self.faceted_candidates, (Some(qt), None) => { let context = CriteriaBuilder::new(&self.rtxn, &self.index)?; - let mut candidates = resolve_query_tree(&context, qt, params.wdcache)?; - candidates -= params.excluded_candidates; - candidates.intersect_with(&self.faceted_candidates); - candidates + let candidates = resolve_query_tree(&context, qt, params.wdcache)?; + candidates & &self.faceted_candidates }, (None, None) => take(&mut self.faceted_candidates), }; + if let Some(filtered_candidates) = filtered_candidates { + candidates &= filtered_candidates; + } + match bucket_candidates { Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates, None => self.bucket_candidates |= &candidates, @@ -136,6 +135,7 @@ impl<'t> Criterion for AscDesc<'t> { return Ok(Some(CriterionResult { query_tree: self.query_tree.clone(), candidates: Some(candidates), + filtered_candidates: None, bucket_candidates: Some(take(&mut self.bucket_candidates)), })); }, diff --git a/milli/src/search/criteria/attribute.rs b/milli/src/search/criteria/attribute.rs index 9d4d88d0d..6818e02fd 100644 --- a/milli/src/search/criteria/attribute.rs +++ b/milli/src/search/criteria/attribute.rs @@ -25,6 +25,7 @@ const LEVEL_EXPONENTIATION_BASE: u32 = 4; const CANDIDATES_THRESHOLD: u64 = 1000; type FlattenedQueryTree = Vec>>; + pub struct Attribute<'t> { ctx: &'t dyn Context<'t>, state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>, @@ -59,6 +60,7 @@ impl<'t> Criterion for Attribute<'t> { return Ok(Some(CriterionResult { query_tree: Some(query_tree), candidates: Some(RoaringBitmap::new()), + filtered_candidates: None, bucket_candidates: Some(take(&mut self.bucket_candidates)), })); }, @@ -78,6 +80,7 @@ impl<'t> Criterion for Attribute<'t> { return Ok(Some(CriterionResult { query_tree: Some(query_tree), candidates: Some(RoaringBitmap::new()), + filtered_candidates: None, bucket_candidates: Some(take(&mut self.bucket_candidates)), })); }, @@ -89,6 +92,7 @@ impl<'t> Criterion for Attribute<'t> { return Ok(Some(CriterionResult { query_tree: Some(query_tree), candidates: Some(RoaringBitmap::new()), + filtered_candidates: None, bucket_candidates: Some(take(&mut self.bucket_candidates)), })); }, @@ -102,17 +106,22 @@ impl<'t> Criterion for Attribute<'t> { return Ok(Some(CriterionResult { query_tree: Some(query_tree), candidates: Some(found_candidates), + filtered_candidates: None, bucket_candidates: Some(take(&mut self.bucket_candidates)), })); }, None => { match self.parent.next(params)? { - Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { - let candidates = match candidates { + Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => { + let mut candidates = match candidates { Some(candidates) => candidates, None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates, }; + if let Some(filtered_candidates) = filtered_candidates { + candidates &= filtered_candidates; + } + let flattened_query_tree = flatten_query_tree(&query_tree); match bucket_candidates { @@ -123,10 +132,11 @@ impl<'t> Criterion for Attribute<'t> { self.state = Some((query_tree, flattened_query_tree, candidates)); self.current_buckets = None; }, - Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => { + Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => { return Ok(Some(CriterionResult { query_tree: None, candidates, + filtered_candidates, bucket_candidates, })); }, diff --git a/milli/src/search/criteria/exactness.rs b/milli/src/search/criteria/exactness.rs index 2fd216053..b1026ccc2 100644 --- a/milli/src/search/criteria/exactness.rs +++ b/milli/src/search/criteria/exactness.rs @@ -66,17 +66,22 @@ impl<'t> Criterion for Exactness<'t> { return Ok(Some(CriterionResult { query_tree: self.query_tree.clone(), candidates: Some(candidates), + filtered_candidates: None, bucket_candidates: Some(take(&mut self.bucket_candidates)), })); }, None => { match self.parent.next(params)? { - Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { - let candidates = match candidates { + Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => { + let mut candidates = match candidates { Some(candidates) => candidates, - None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)?, + None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates, }; + if let Some(filtered_candidates) = filtered_candidates { + candidates &= filtered_candidates; + } + match bucket_candidates { Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates, None => self.bucket_candidates |= &candidates, @@ -85,10 +90,11 @@ impl<'t> Criterion for Exactness<'t> { self.state = Some(State::new(candidates)); self.query_tree = Some(query_tree); }, - Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { + Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => { return Ok(Some(CriterionResult { - query_tree, + query_tree: None, candidates, + filtered_candidates, bucket_candidates, })); }, diff --git a/milli/src/search/criteria/final.rs b/milli/src/search/criteria/final.rs index e2fb81aaf..860362f51 100644 --- a/milli/src/search/criteria/final.rs +++ b/milli/src/search/criteria/final.rs @@ -31,27 +31,32 @@ impl<'t> Final<'t> { #[logging_timer::time("Final::{}")] pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> anyhow::Result> { debug!("Final iteration"); + let excluded_candidates = &self.returned_candidates | excluded_candidates; let mut criterion_parameters = CriterionParameters { wdcache: &mut self.wdcache, // returned_candidates is merged with excluded_candidates to avoid duplicas - excluded_candidates: &(&self.returned_candidates | excluded_candidates), + excluded_candidates: &excluded_candidates, }; match self.parent.next(&mut criterion_parameters)? { - Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { - let candidates = match (candidates, query_tree.as_ref()) { + Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => { + let mut candidates = match (candidates, query_tree.as_ref()) { (Some(candidates), _) => candidates, - (None, Some(qt)) => resolve_query_tree(self.ctx, qt, &mut self.wdcache)?, - (None, None) => self.ctx.documents_ids()?, + (None, Some(qt)) => resolve_query_tree(self.ctx, qt, &mut self.wdcache)? - excluded_candidates, + (None, None) => self.ctx.documents_ids()? - excluded_candidates, }; + if let Some(filtered_candidates) = filtered_candidates { + candidates &= filtered_candidates; + } + let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone()); self.returned_candidates |= &candidates; - return Ok(Some(FinalResult { query_tree, candidates, bucket_candidates })); + Ok(Some(FinalResult { query_tree, candidates, bucket_candidates })) }, - None => return Ok(None), + None => Ok(None), } } } diff --git a/milli/src/search/criteria/initial.rs b/milli/src/search/criteria/initial.rs index eb2bf0b95..5d242a0eb 100644 --- a/milli/src/search/criteria/initial.rs +++ b/milli/src/search/criteria/initial.rs @@ -9,10 +9,11 @@ pub struct Initial { } impl Initial { - pub fn new(query_tree: Option, mut candidates: Option) -> Initial { + pub fn new(query_tree: Option, filtered_candidates: Option) -> Initial { let answer = CriterionResult { query_tree, - candidates: candidates.take(), + candidates: None, + filtered_candidates, bucket_candidates: None, }; Initial { answer: Some(answer) } diff --git a/milli/src/search/criteria/mod.rs b/milli/src/search/criteria/mod.rs index f9fca2624..99e4a4209 100644 --- a/milli/src/search/criteria/mod.rs +++ b/milli/src/search/criteria/mod.rs @@ -38,6 +38,8 @@ pub struct CriterionResult { /// The candidates that this criterion is allowed to return subsets of, /// if None, it is up to the child to compute the candidates itself. candidates: Option, + /// The candidates, coming from facet filters, that this criterion is allowed to return subsets of. + filtered_candidates: Option, /// Candidates that comes from the current bucket of the initial criterion. bucket_candidates: Option, } diff --git a/milli/src/search/criteria/proximity.rs b/milli/src/search/criteria/proximity.rs index e50d3941d..bf9be9b9f 100644 --- a/milli/src/search/criteria/proximity.rs +++ b/milli/src/search/criteria/proximity.rs @@ -113,17 +113,22 @@ impl<'t> Criterion for Proximity<'t> { return Ok(Some(CriterionResult { query_tree: Some(query_tree.clone()), candidates: Some(new_candidates), + filtered_candidates: None, bucket_candidates: Some(take(&mut self.bucket_candidates)), })); }, None => { match self.parent.next(params)? { - Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { - let candidates = match candidates { + Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => { + let mut candidates = match candidates { Some(candidates) => candidates, None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates, }; + if let Some(filtered_candidates) = filtered_candidates { + candidates &= filtered_candidates; + } + match bucket_candidates { Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates, None => self.bucket_candidates |= &candidates, @@ -134,10 +139,11 @@ impl<'t> Criterion for Proximity<'t> { self.proximity = 0; self.plane_sweep_cache = None; }, - Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => { + Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => { return Ok(Some(CriterionResult { query_tree: None, candidates, + filtered_candidates, bucket_candidates, })); }, diff --git a/milli/src/search/criteria/typo.rs b/milli/src/search/criteria/typo.rs index 059d52e48..a844417eb 100644 --- a/milli/src/search/criteria/typo.rs +++ b/milli/src/search/criteria/typo.rs @@ -119,30 +119,33 @@ impl<'t> Criterion for Typo<'t> { return Ok(Some(CriterionResult { query_tree: Some(new_query_tree), candidates: Some(candidates), + filtered_candidates: None, bucket_candidates: Some(bucket_candidates), })); }, None => { match self.parent.next(params)? { - Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { + Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => { self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) { (Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc), (self_bc, parent_bc) => self_bc.or(parent_bc), }; - let candidates = candidates.map_or_else(|| { - Candidates::Forbidden(params.excluded_candidates.clone()) - }, Candidates::Allowed); + let candidates = match candidates.or(filtered_candidates) { + Some(candidates) => Candidates::Allowed(candidates - params.excluded_candidates), + None => Candidates::Forbidden(params.excluded_candidates.clone()), + }; let maximum_typos = maximum_typo(&query_tree) as u8; self.state = Some((maximum_typos, query_tree, candidates)); self.typos = 0; }, - Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => { + Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => { return Ok(Some(CriterionResult { query_tree: None, candidates, + filtered_candidates, bucket_candidates, })); }, @@ -377,6 +380,7 @@ mod test { ])), candidates: Some(candidates_1.clone()), bucket_candidates: Some(candidates_1), + filtered_candidates: None, }; assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1)); @@ -399,6 +403,7 @@ mod test { ])), candidates: Some(candidates_2.clone()), bucket_candidates: Some(candidates_2), + filtered_candidates: None, }; assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2)); @@ -419,8 +424,9 @@ mod test { let expected = CriterionResult { query_tree: None, - candidates: Some(facet_candidates.clone()), + candidates: None, bucket_candidates: None, + filtered_candidates: Some(facet_candidates.clone()), }; // first iteration, returns the facet candidates @@ -464,6 +470,7 @@ mod test { ])), candidates: Some(&candidates_1 & &facet_candidates), bucket_candidates: Some(&candidates_1 & &facet_candidates), + filtered_candidates: None, }; assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1)); @@ -486,6 +493,7 @@ mod test { ])), candidates: Some(&candidates_2 & &facet_candidates), bucket_candidates: Some(&candidates_2 & &facet_candidates), + filtered_candidates: None, }; assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2)); diff --git a/milli/src/search/criteria/words.rs b/milli/src/search/criteria/words.rs index 2f7ebbfbf..8730fa331 100644 --- a/milli/src/search/criteria/words.rs +++ b/milli/src/search/criteria/words.rs @@ -11,8 +11,8 @@ pub struct Words<'t> { query_trees: Vec, candidates: Option, bucket_candidates: Option, + filtered_candidates: Option, parent: Box, - compute_candidates: bool, } impl<'t> Words<'t> { @@ -23,7 +23,7 @@ impl<'t> Words<'t> { candidates: None, bucket_candidates: None, parent, - compute_candidates: false, + filtered_candidates: None, } } } @@ -42,13 +42,13 @@ impl<'t> Criterion for Words<'t> { match self.query_trees.pop() { Some(query_tree) => { let candidates = match self.candidates.as_mut() { - Some(allowed_candidates) if self.compute_candidates => { + Some(allowed_candidates) => { let mut candidates = resolve_query_tree(self.ctx, &query_tree, params.wdcache)?; candidates &= &*allowed_candidates; *allowed_candidates -= &candidates; Some(candidates) }, - candidates => candidates.cloned(), + None => None, }; let bucket_candidates = match self.bucket_candidates.as_mut() { @@ -59,25 +59,27 @@ impl<'t> Criterion for Words<'t> { return Ok(Some(CriterionResult { query_tree: Some(query_tree), candidates, + filtered_candidates: self.filtered_candidates.clone(), bucket_candidates, })); }, None => { match self.parent.next(params)? { - Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { + Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => { self.query_trees = explode_query_tree(query_tree); self.candidates = candidates; - self.compute_candidates = bucket_candidates.is_some(); + self.filtered_candidates = filtered_candidates; self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) { (Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc), (self_bc, parent_bc) => self_bc.or(parent_bc), }; }, - Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => { + Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => { return Ok(Some(CriterionResult { query_tree: None, candidates, + filtered_candidates, bucket_candidates, })); },