Introduce a filtered_candidates field

This commit is contained in:
many 2021-05-10 12:33:37 +02:00
parent efba662ca6
commit a3944a7083
No known key found for this signature in database
GPG Key ID: 2CEF23B75189EACA
9 changed files with 83 additions and 43 deletions

View File

@ -93,23 +93,22 @@ impl<'t> Criterion for AscDesc<'t> {
match self.candidates.next().transpose()? { match self.candidates.next().transpose()? {
None => { None => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => {
self.query_tree = query_tree; self.query_tree = query_tree;
let candidates = match (&self.query_tree, candidates) { let mut candidates = match (&self.query_tree, candidates) {
(_, Some(mut candidates)) => { (_, Some(candidates)) => candidates & &self.faceted_candidates,
candidates.intersect_with(&self.faceted_candidates);
candidates
},
(Some(qt), None) => { (Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?; let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
let mut candidates = resolve_query_tree(&context, qt, params.wdcache)?; let candidates = resolve_query_tree(&context, qt, params.wdcache)?;
candidates -= params.excluded_candidates; candidates & &self.faceted_candidates
candidates.intersect_with(&self.faceted_candidates);
candidates
}, },
(None, None) => take(&mut self.faceted_candidates), (None, None) => take(&mut self.faceted_candidates),
}; };
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
match bucket_candidates { match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates, Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates, None => self.bucket_candidates |= &candidates,
@ -136,6 +135,7 @@ impl<'t> Criterion for AscDesc<'t> {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(), query_tree: self.query_tree.clone(),
candidates: Some(candidates), candidates: Some(candidates),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },

View File

@ -25,6 +25,7 @@ const LEVEL_EXPONENTIATION_BASE: u32 = 4;
const CANDIDATES_THRESHOLD: u64 = 1000; const CANDIDATES_THRESHOLD: u64 = 1000;
type FlattenedQueryTree = Vec<Vec<Vec<Query>>>; type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
pub struct Attribute<'t> { pub struct Attribute<'t> {
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>, state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
@ -59,6 +60,7 @@ impl<'t> Criterion for Attribute<'t> {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()), candidates: Some(RoaringBitmap::new()),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
@ -78,6 +80,7 @@ impl<'t> Criterion for Attribute<'t> {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()), candidates: Some(RoaringBitmap::new()),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
@ -89,6 +92,7 @@ impl<'t> Criterion for Attribute<'t> {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()), candidates: Some(RoaringBitmap::new()),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
@ -102,17 +106,22 @@ impl<'t> Criterion for Attribute<'t> {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates: Some(found_candidates), candidates: Some(found_candidates),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
None => { None => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
let candidates = match candidates { let mut candidates = match candidates {
Some(candidates) => candidates, Some(candidates) => candidates,
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates, None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
}; };
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
let flattened_query_tree = flatten_query_tree(&query_tree); let flattened_query_tree = flatten_query_tree(&query_tree);
match bucket_candidates { match bucket_candidates {
@ -123,10 +132,11 @@ impl<'t> Criterion for Attribute<'t> {
self.state = Some((query_tree, flattened_query_tree, candidates)); self.state = Some((query_tree, flattened_query_tree, candidates));
self.current_buckets = None; self.current_buckets = None;
}, },
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates,
bucket_candidates, bucket_candidates,
})); }));
}, },

View File

@ -66,17 +66,22 @@ impl<'t> Criterion for Exactness<'t> {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(), query_tree: self.query_tree.clone(),
candidates: Some(candidates), candidates: Some(candidates),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
None => { None => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
let candidates = match candidates { let mut candidates = match candidates {
Some(candidates) => candidates, Some(candidates) => candidates,
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)?, None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
}; };
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
match bucket_candidates { match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates, Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates, None => self.bucket_candidates |= &candidates,
@ -85,10 +90,11 @@ impl<'t> Criterion for Exactness<'t> {
self.state = Some(State::new(candidates)); self.state = Some(State::new(candidates));
self.query_tree = Some(query_tree); self.query_tree = Some(query_tree);
}, },
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree, query_tree: None,
candidates, candidates,
filtered_candidates,
bucket_candidates, bucket_candidates,
})); }));
}, },

View File

@ -31,27 +31,32 @@ impl<'t> Final<'t> {
#[logging_timer::time("Final::{}")] #[logging_timer::time("Final::{}")]
pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> anyhow::Result<Option<FinalResult>> { pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> anyhow::Result<Option<FinalResult>> {
debug!("Final iteration"); debug!("Final iteration");
let excluded_candidates = &self.returned_candidates | excluded_candidates;
let mut criterion_parameters = CriterionParameters { let mut criterion_parameters = CriterionParameters {
wdcache: &mut self.wdcache, wdcache: &mut self.wdcache,
// returned_candidates is merged with excluded_candidates to avoid duplicas // returned_candidates is merged with excluded_candidates to avoid duplicas
excluded_candidates: &(&self.returned_candidates | excluded_candidates), excluded_candidates: &excluded_candidates,
}; };
match self.parent.next(&mut criterion_parameters)? { match self.parent.next(&mut criterion_parameters)? {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => {
let candidates = match (candidates, query_tree.as_ref()) { let mut candidates = match (candidates, query_tree.as_ref()) {
(Some(candidates), _) => candidates, (Some(candidates), _) => candidates,
(None, Some(qt)) => resolve_query_tree(self.ctx, qt, &mut self.wdcache)?, (None, Some(qt)) => resolve_query_tree(self.ctx, qt, &mut self.wdcache)? - excluded_candidates,
(None, None) => self.ctx.documents_ids()?, (None, None) => self.ctx.documents_ids()? - excluded_candidates,
}; };
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone()); let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone());
self.returned_candidates |= &candidates; self.returned_candidates |= &candidates;
return Ok(Some(FinalResult { query_tree, candidates, bucket_candidates })); Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }))
}, },
None => return Ok(None), None => Ok(None),
} }
} }
} }

View File

@ -9,10 +9,11 @@ pub struct Initial {
} }
impl Initial { impl Initial {
pub fn new(query_tree: Option<Operation>, mut candidates: Option<RoaringBitmap>) -> Initial { pub fn new(query_tree: Option<Operation>, filtered_candidates: Option<RoaringBitmap>) -> Initial {
let answer = CriterionResult { let answer = CriterionResult {
query_tree, query_tree,
candidates: candidates.take(), candidates: None,
filtered_candidates,
bucket_candidates: None, bucket_candidates: None,
}; };
Initial { answer: Some(answer) } Initial { answer: Some(answer) }

View File

@ -38,6 +38,8 @@ pub struct CriterionResult {
/// The candidates that this criterion is allowed to return subsets of, /// The candidates that this criterion is allowed to return subsets of,
/// if None, it is up to the child to compute the candidates itself. /// if None, it is up to the child to compute the candidates itself.
candidates: Option<RoaringBitmap>, candidates: Option<RoaringBitmap>,
/// The candidates, coming from facet filters, that this criterion is allowed to return subsets of.
filtered_candidates: Option<RoaringBitmap>,
/// Candidates that comes from the current bucket of the initial criterion. /// Candidates that comes from the current bucket of the initial criterion.
bucket_candidates: Option<RoaringBitmap>, bucket_candidates: Option<RoaringBitmap>,
} }

View File

@ -113,17 +113,22 @@ impl<'t> Criterion for Proximity<'t> {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(query_tree.clone()), query_tree: Some(query_tree.clone()),
candidates: Some(new_candidates), candidates: Some(new_candidates),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
None => { None => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
let candidates = match candidates { let mut candidates = match candidates {
Some(candidates) => candidates, Some(candidates) => candidates,
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates, None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
}; };
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
match bucket_candidates { match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates, Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates, None => self.bucket_candidates |= &candidates,
@ -134,10 +139,11 @@ impl<'t> Criterion for Proximity<'t> {
self.proximity = 0; self.proximity = 0;
self.plane_sweep_cache = None; self.plane_sweep_cache = None;
}, },
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates,
bucket_candidates, bucket_candidates,
})); }));
}, },

View File

@ -119,30 +119,33 @@ impl<'t> Criterion for Typo<'t> {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(new_query_tree), query_tree: Some(new_query_tree),
candidates: Some(candidates), candidates: Some(candidates),
filtered_candidates: None,
bucket_candidates: Some(bucket_candidates), bucket_candidates: Some(bucket_candidates),
})); }));
}, },
None => { None => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) { self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc), (Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc), (self_bc, parent_bc) => self_bc.or(parent_bc),
}; };
let candidates = candidates.map_or_else(|| { let candidates = match candidates.or(filtered_candidates) {
Candidates::Forbidden(params.excluded_candidates.clone()) Some(candidates) => Candidates::Allowed(candidates - params.excluded_candidates),
}, Candidates::Allowed); None => Candidates::Forbidden(params.excluded_candidates.clone()),
};
let maximum_typos = maximum_typo(&query_tree) as u8; let maximum_typos = maximum_typo(&query_tree) as u8;
self.state = Some((maximum_typos, query_tree, candidates)); self.state = Some((maximum_typos, query_tree, candidates));
self.typos = 0; self.typos = 0;
}, },
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates,
bucket_candidates, bucket_candidates,
})); }));
}, },
@ -377,6 +380,7 @@ mod test {
])), ])),
candidates: Some(candidates_1.clone()), candidates: Some(candidates_1.clone()),
bucket_candidates: Some(candidates_1), bucket_candidates: Some(candidates_1),
filtered_candidates: None,
}; };
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1)); assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
@ -399,6 +403,7 @@ mod test {
])), ])),
candidates: Some(candidates_2.clone()), candidates: Some(candidates_2.clone()),
bucket_candidates: Some(candidates_2), bucket_candidates: Some(candidates_2),
filtered_candidates: None,
}; };
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2)); assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));
@ -419,8 +424,9 @@ mod test {
let expected = CriterionResult { let expected = CriterionResult {
query_tree: None, query_tree: None,
candidates: Some(facet_candidates.clone()), candidates: None,
bucket_candidates: None, bucket_candidates: None,
filtered_candidates: Some(facet_candidates.clone()),
}; };
// first iteration, returns the facet candidates // first iteration, returns the facet candidates
@ -464,6 +470,7 @@ mod test {
])), ])),
candidates: Some(&candidates_1 & &facet_candidates), candidates: Some(&candidates_1 & &facet_candidates),
bucket_candidates: Some(&candidates_1 & &facet_candidates), bucket_candidates: Some(&candidates_1 & &facet_candidates),
filtered_candidates: None,
}; };
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1)); assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
@ -486,6 +493,7 @@ mod test {
])), ])),
candidates: Some(&candidates_2 & &facet_candidates), candidates: Some(&candidates_2 & &facet_candidates),
bucket_candidates: Some(&candidates_2 & &facet_candidates), bucket_candidates: Some(&candidates_2 & &facet_candidates),
filtered_candidates: None,
}; };
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2)); assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));

View File

@ -11,8 +11,8 @@ pub struct Words<'t> {
query_trees: Vec<Operation>, query_trees: Vec<Operation>,
candidates: Option<RoaringBitmap>, candidates: Option<RoaringBitmap>,
bucket_candidates: Option<RoaringBitmap>, bucket_candidates: Option<RoaringBitmap>,
filtered_candidates: Option<RoaringBitmap>,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
compute_candidates: bool,
} }
impl<'t> Words<'t> { impl<'t> Words<'t> {
@ -23,7 +23,7 @@ impl<'t> Words<'t> {
candidates: None, candidates: None,
bucket_candidates: None, bucket_candidates: None,
parent, parent,
compute_candidates: false, filtered_candidates: None,
} }
} }
} }
@ -42,13 +42,13 @@ impl<'t> Criterion for Words<'t> {
match self.query_trees.pop() { match self.query_trees.pop() {
Some(query_tree) => { Some(query_tree) => {
let candidates = match self.candidates.as_mut() { let candidates = match self.candidates.as_mut() {
Some(allowed_candidates) if self.compute_candidates => { Some(allowed_candidates) => {
let mut candidates = resolve_query_tree(self.ctx, &query_tree, params.wdcache)?; let mut candidates = resolve_query_tree(self.ctx, &query_tree, params.wdcache)?;
candidates &= &*allowed_candidates; candidates &= &*allowed_candidates;
*allowed_candidates -= &candidates; *allowed_candidates -= &candidates;
Some(candidates) Some(candidates)
}, },
candidates => candidates.cloned(), None => None,
}; };
let bucket_candidates = match self.bucket_candidates.as_mut() { let bucket_candidates = match self.bucket_candidates.as_mut() {
@ -59,25 +59,27 @@ impl<'t> Criterion for Words<'t> {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates, candidates,
filtered_candidates: self.filtered_candidates.clone(),
bucket_candidates, bucket_candidates,
})); }));
}, },
None => { None => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
self.query_trees = explode_query_tree(query_tree); self.query_trees = explode_query_tree(query_tree);
self.candidates = candidates; self.candidates = candidates;
self.compute_candidates = bucket_candidates.is_some(); self.filtered_candidates = filtered_candidates;
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) { self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc), (Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc), (self_bc, parent_bc) => self_bc.or(parent_bc),
}; };
}, },
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates,
bucket_candidates, bucket_candidates,
})); }));
}, },