190: Make bucket candidates optionals r=Kerollmops a=LegendreM

Before the bucket candidates were the result of the facet filters or result of the query tree.
They will now be only the result of the query tree, making the number of candidates more consistent between the same request with or without facet filters.

Fix some clippy warnings.

Fix #186 

Co-authored-by: many <maxime@meilisearch.com>
This commit is contained in:
bors[bot] 2021-05-24 11:19:32 +00:00 committed by GitHub
commit 49bee2ebc5
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
10 changed files with 363 additions and 393 deletions

View File

@ -93,33 +93,25 @@ impl<'t> Criterion for AscDesc<'t> {
match self.candidates.next().transpose()? { match self.candidates.next().transpose()? {
None => { None => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => {
let candidates_is_some = candidates.is_some();
self.query_tree = query_tree; self.query_tree = query_tree;
let candidates = match (&self.query_tree, candidates) { let mut candidates = match (&self.query_tree, candidates) {
(_, Some(mut candidates)) => { (_, Some(candidates)) => candidates & &self.faceted_candidates,
candidates.intersect_with(&self.faceted_candidates);
candidates
},
(Some(qt), None) => { (Some(qt), None) => {
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?; let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), params.wdcache)?; let candidates = resolve_query_tree(&context, qt, params.wdcache)?;
candidates -= params.excluded_candidates; candidates & &self.faceted_candidates
candidates.intersect_with(&self.faceted_candidates);
candidates
}, },
(None, None) => take(&mut self.faceted_candidates), (None, None) => take(&mut self.faceted_candidates),
}; };
// If our parent returns candidates it means that the bucket if let Some(filtered_candidates) = filtered_candidates {
// candidates were already computed before and we can use them. candidates &= filtered_candidates;
// }
// If not, we must use the just computed candidates as our bucket
// candidates. match bucket_candidates {
if candidates_is_some { Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
self.bucket_candidates.union_with(&bucket_candidates); None => self.bucket_candidates |= &candidates,
} else {
self.bucket_candidates.union_with(&candidates);
} }
if candidates.is_empty() { if candidates.is_empty() {
@ -143,7 +135,8 @@ impl<'t> Criterion for AscDesc<'t> {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(), query_tree: self.query_tree.clone(),
candidates: Some(candidates), candidates: Some(candidates),
bucket_candidates: take(&mut self.bucket_candidates), filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
} }
@ -222,7 +215,7 @@ fn iterative_facet_ordered_iter<'t>(
docids_values.push((docid, OrderedFloat(value))); docids_values.push((docid, OrderedFloat(value)));
} }
} }
docids_values.sort_unstable_by_key(|(_, v)| v.clone()); docids_values.sort_unstable_by_key(|(_, v)| *v);
let iter = docids_values.into_iter(); let iter = docids_values.into_iter();
let iter = if ascending { let iter = if ascending {
Box::new(iter) as Box<dyn Iterator<Item = _>> Box::new(iter) as Box<dyn Iterator<Item = _>>
@ -233,7 +226,7 @@ fn iterative_facet_ordered_iter<'t>(
// The itertools GroupBy iterator doesn't provide an owned version, we are therefore // The itertools GroupBy iterator doesn't provide an owned version, we are therefore
// required to collect the result into an owned collection (a Vec). // required to collect the result into an owned collection (a Vec).
// https://github.com/rust-itertools/itertools/issues/499 // https://github.com/rust-itertools/itertools/issues/499
let vec: Vec<_> = iter.group_by(|(_, v)| v.clone()) let vec: Vec<_> = iter.group_by(|(_, v)| *v)
.into_iter() .into_iter()
.map(|(_, ids)| ids.map(|(id, _)| id).collect()) .map(|(_, ids)| ids.map(|(id, _)| id).collect())
.collect(); .collect();

View File

@ -24,13 +24,13 @@ const LEVEL_EXPONENTIATION_BASE: u32 = 4;
/// the system to choose between one algorithm or another. /// the system to choose between one algorithm or another.
const CANDIDATES_THRESHOLD: u64 = 1000; const CANDIDATES_THRESHOLD: u64 = 1000;
type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
pub struct Attribute<'t> { pub struct Attribute<'t> {
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
query_tree: Option<Operation>, state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
candidates: Option<RoaringBitmap>,
bucket_candidates: RoaringBitmap, bucket_candidates: RoaringBitmap,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
flattened_query_tree: Option<Vec<Vec<Vec<Query>>>>,
current_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>, current_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
} }
@ -38,11 +38,9 @@ impl<'t> Attribute<'t> {
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self { pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
Attribute { Attribute {
ctx, ctx,
query_tree: None, state: None,
candidates: None,
bucket_candidates: RoaringBitmap::new(), bucket_candidates: RoaringBitmap::new(),
parent, parent,
flattened_query_tree: None,
current_buckets: None, current_buckets: None,
} }
} }
@ -52,29 +50,26 @@ impl<'t> Criterion for Attribute<'t> {
#[logging_timer::time("Attribute::{}")] #[logging_timer::time("Attribute::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
// remove excluded candidates when next is called, instead of doing it in the loop. // remove excluded candidates when next is called, instead of doing it in the loop.
if let Some(candidates) = self.candidates.as_mut() { if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
*candidates -= params.excluded_candidates; *allowed_candidates -= params.excluded_candidates;
} }
loop { loop {
match (&self.query_tree, &mut self.candidates) { match self.state.take() {
(_, Some(candidates)) if candidates.is_empty() => { Some((query_tree, _, allowed_candidates)) if allowed_candidates.is_empty() => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.take(), query_tree: Some(query_tree),
candidates: self.candidates.take(), candidates: Some(RoaringBitmap::new()),
bucket_candidates: take(&mut self.bucket_candidates), filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
(Some(qt), Some(candidates)) => { Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| { let found_candidates = if allowed_candidates.len() < CANDIDATES_THRESHOLD {
flatten_query_tree(&qt)
});
let found_candidates = if candidates.len() < CANDIDATES_THRESHOLD {
let current_buckets = match self.current_buckets.as_mut() { let current_buckets = match self.current_buckets.as_mut() {
Some(current_buckets) => current_buckets, Some(current_buckets) => current_buckets,
None => { None => {
let new_buckets = linear_compute_candidates(self.ctx, flattened_query_tree, candidates)?; let new_buckets = linear_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates)?;
self.current_buckets.get_or_insert(new_buckets.into_iter()) self.current_buckets.get_or_insert(new_buckets.into_iter())
}, },
}; };
@ -83,62 +78,68 @@ impl<'t> Criterion for Attribute<'t> {
Some((_score, candidates)) => candidates, Some((_score, candidates)) => candidates,
None => { None => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.take(), query_tree: Some(query_tree),
candidates: self.candidates.take(), candidates: Some(RoaringBitmap::new()),
bucket_candidates: take(&mut self.bucket_candidates), filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
} }
} else { } else {
match set_compute_candidates(self.ctx, flattened_query_tree, candidates, params.wdcache)? { match set_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates, params.wdcache)? {
Some(candidates) => candidates, Some(candidates) => candidates,
None => { None => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.take(), query_tree: Some(query_tree),
candidates: self.candidates.take(), candidates: Some(RoaringBitmap::new()),
bucket_candidates: take(&mut self.bucket_candidates), filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
} }
}; };
candidates.difference_with(&found_candidates); allowed_candidates -= &found_candidates;
self.state = Some((query_tree.clone(), flattened_query_tree, allowed_candidates));
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(), query_tree: Some(query_tree),
candidates: Some(found_candidates), candidates: Some(found_candidates),
bucket_candidates: take(&mut self.bucket_candidates), filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
(Some(qt), None) => { None => {
let mut query_tree_candidates = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), params.wdcache)?;
query_tree_candidates -= params.excluded_candidates;
self.bucket_candidates |= &query_tree_candidates;
self.candidates = Some(query_tree_candidates);
},
(None, Some(_)) => {
return Ok(Some(CriterionResult {
query_tree: self.query_tree.take(),
candidates: self.candidates.take(),
bucket_candidates: take(&mut self.bucket_candidates),
}));
},
(None, None) => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
let mut candidates = match candidates {
Some(candidates) => candidates,
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
};
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
let flattened_query_tree = flatten_query_tree(&query_tree);
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
self.state = Some((query_tree, flattened_query_tree, candidates));
self.current_buckets = None;
},
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates: None, candidates,
filtered_candidates,
bucket_candidates, bucket_candidates,
})); }));
}, },
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
self.query_tree = query_tree;
self.candidates = candidates;
self.bucket_candidates |= bucket_candidates;
self.flattened_query_tree = None;
self.current_buckets = None;
},
None => return Ok(None), None => return Ok(None),
} }
}, },
@ -164,7 +165,7 @@ impl<'t, 'q> WordLevelIterator<'t, 'q> {
fn new(ctx: &'t dyn Context<'t>, word: Cow<'q, str>, in_prefix_cache: bool) -> heed::Result<Option<Self>> { fn new(ctx: &'t dyn Context<'t>, word: Cow<'q, str>, in_prefix_cache: bool) -> heed::Result<Option<Self>> {
match ctx.word_position_last_level(&word, in_prefix_cache)? { match ctx.word_position_last_level(&word, in_prefix_cache)? {
Some(level) => { Some(level) => {
let interval_size = LEVEL_EXPONENTIATION_BASE.pow(Into::<u8>::into(level.clone()) as u32); let interval_size = LEVEL_EXPONENTIATION_BASE.pow(Into::<u8>::into(level) as u32);
let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, None, None)?; let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, None, None)?;
Ok(Some(Self { inner, level, interval_size, word, in_prefix_cache, inner_next: None, current_interval: None })) Ok(Some(Self { inner, level, interval_size, word, in_prefix_cache, inner_next: None, current_interval: None }))
}, },
@ -173,8 +174,8 @@ impl<'t, 'q> WordLevelIterator<'t, 'q> {
} }
fn dig(&self, ctx: &'t dyn Context<'t>, level: &TreeLevel, left_interval: Option<u32>) -> heed::Result<Self> { fn dig(&self, ctx: &'t dyn Context<'t>, level: &TreeLevel, left_interval: Option<u32>) -> heed::Result<Self> {
let level = level.min(&self.level).clone(); let level = *level.min(&self.level);
let interval_size = LEVEL_EXPONENTIATION_BASE.pow(Into::<u8>::into(level.clone()) as u32); let interval_size = LEVEL_EXPONENTIATION_BASE.pow(Into::<u8>::into(level) as u32);
let word = self.word.clone(); let word = self.word.clone();
let in_prefix_cache = self.in_prefix_cache; let in_prefix_cache = self.in_prefix_cache;
let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, left_interval, None)?; let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, left_interval, None)?;
@ -223,7 +224,7 @@ struct QueryLevelIterator<'t, 'q> {
} }
impl<'t, 'q> QueryLevelIterator<'t, 'q> { impl<'t, 'q> QueryLevelIterator<'t, 'q> {
fn new(ctx: &'t dyn Context<'t>, queries: &'q Vec<Query>, wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<Self>> { fn new(ctx: &'t dyn Context<'t>, queries: &'q [Query], wdcache: &mut WordDerivationsCache) -> anyhow::Result<Option<Self>> {
let mut inner = Vec::with_capacity(queries.len()); let mut inner = Vec::with_capacity(queries.len());
for query in queries { for query in queries {
match &query.kind { match &query.kind {
@ -253,7 +254,7 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
} }
} }
let highest = inner.iter().max_by_key(|wli| wli.level).map(|wli| wli.level.clone()); let highest = inner.iter().max_by_key(|wli| wli.level).map(|wli| wli.level);
match highest { match highest {
Some(level) => Ok(Some(Self { Some(level) => Ok(Some(Self {
parent: None, parent: None,
@ -296,7 +297,7 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
let u8_level = Into::<u8>::into(level); let u8_level = Into::<u8>::into(level);
let interval_size = LEVEL_EXPONENTIATION_BASE.pow(u8_level as u32); let interval_size = LEVEL_EXPONENTIATION_BASE.pow(u8_level as u32);
for wli in self.inner.iter_mut() { for wli in self.inner.iter_mut() {
let wli_u8_level = Into::<u8>::into(wli.level.clone()); let wli_u8_level = Into::<u8>::into(wli.level);
let accumulated_count = LEVEL_EXPONENTIATION_BASE.pow((u8_level - wli_u8_level) as u32); let accumulated_count = LEVEL_EXPONENTIATION_BASE.pow((u8_level - wli_u8_level) as u32);
for _ in 0..accumulated_count { for _ in 0..accumulated_count {
if let Some((next_left, _, next_docids)) = wli.next()? { if let Some((next_left, _, next_docids)) = wli.next()? {
@ -373,8 +374,8 @@ fn interval_to_skip(
already_skiped: usize, already_skiped: usize,
allowed_candidates: &RoaringBitmap, allowed_candidates: &RoaringBitmap,
) -> usize { ) -> usize {
parent_accumulator.into_iter() parent_accumulator.iter()
.zip(current_accumulator.into_iter()) .zip(current_accumulator.iter())
.skip(already_skiped) .skip(already_skiped)
.take_while(|(parent, current)| { .take_while(|(parent, current)| {
let skip_parent = parent.as_ref().map_or(true, |(_, _, docids)| docids.is_empty()); let skip_parent = parent.as_ref().map_or(true, |(_, _, docids)| docids.is_empty());
@ -419,7 +420,7 @@ impl<'t, 'q> Branch<'t, 'q> {
/// update inner interval in order to be ranked by the binary_heap without computing it, /// update inner interval in order to be ranked by the binary_heap without computing it,
/// the next() method should be called when the real interval is needed. /// the next() method should be called when the real interval is needed.
fn lazy_next(&mut self) { fn lazy_next(&mut self) {
let u8_level = Into::<u8>::into(self.tree_level.clone()); let u8_level = Into::<u8>::into(self.tree_level);
let interval_size = LEVEL_EXPONENTIATION_BASE.pow(u8_level as u32); let interval_size = LEVEL_EXPONENTIATION_BASE.pow(u8_level as u32);
let (left, right, _) = self.last_result; let (left, right, _) = self.last_result;
@ -467,7 +468,7 @@ impl<'t, 'q> Eq for Branch<'t, 'q> {}
fn initialize_query_level_iterators<'t, 'q>( fn initialize_query_level_iterators<'t, 'q>(
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
branches: &'q Vec<Vec<Vec<Query>>>, branches: &'q FlattenedQueryTree,
allowed_candidates: &RoaringBitmap, allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> { ) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> {
@ -517,7 +518,7 @@ fn initialize_query_level_iterators<'t, 'q>(
fn set_compute_candidates<'t>( fn set_compute_candidates<'t>(
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
branches: &Vec<Vec<Vec<Query>>>, branches: &FlattenedQueryTree,
allowed_candidates: &RoaringBitmap, allowed_candidates: &RoaringBitmap,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<Option<RoaringBitmap>> ) -> anyhow::Result<Option<RoaringBitmap>>
@ -570,11 +571,11 @@ fn set_compute_candidates<'t>(
fn linear_compute_candidates( fn linear_compute_candidates(
ctx: &dyn Context, ctx: &dyn Context,
branches: &Vec<Vec<Vec<Query>>>, branches: &FlattenedQueryTree,
allowed_candidates: &RoaringBitmap, allowed_candidates: &RoaringBitmap,
) -> anyhow::Result<BTreeMap<u64, RoaringBitmap>> ) -> anyhow::Result<BTreeMap<u64, RoaringBitmap>>
{ {
fn compute_candidate_rank(branches: &Vec<Vec<Vec<Query>>>, words_positions: HashMap<String, RoaringBitmap>) -> u64 { fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
let mut min_rank = u64::max_value(); let mut min_rank = u64::max_value();
for branch in branches { for branch in branches {
@ -659,10 +660,10 @@ fn linear_compute_candidates(
} }
// TODO can we keep refs of Query // TODO can we keep refs of Query
fn flatten_query_tree(query_tree: &Operation) -> Vec<Vec<Vec<Query>>> { fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
use crate::search::criteria::Operation::{And, Or, Consecutive}; use crate::search::criteria::Operation::{And, Or, Consecutive};
fn and_recurse(head: &Operation, tail: &[Operation]) -> Vec<Vec<Vec<Query>>> { fn and_recurse(head: &Operation, tail: &[Operation]) -> FlattenedQueryTree {
match tail.split_first() { match tail.split_first() {
Some((thead, tail)) => { Some((thead, tail)) => {
let tail = and_recurse(thead, tail); let tail = and_recurse(thead, tail);
@ -680,7 +681,7 @@ fn flatten_query_tree(query_tree: &Operation) -> Vec<Vec<Vec<Query>>> {
} }
} }
fn recurse(op: &Operation) -> Vec<Vec<Vec<Query>>> { fn recurse(op: &Operation) -> FlattenedQueryTree {
match op { match op {
And(ops) | Consecutive(ops) => { And(ops) | Consecutive(ops) => {
ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t)) ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t))
@ -688,7 +689,7 @@ fn flatten_query_tree(query_tree: &Operation) -> Vec<Vec<Vec<Query>>> {
Or(_, ops) => if ops.iter().all(|op| op.query().is_some()) { Or(_, ops) => if ops.iter().all(|op| op.query().is_some()) {
vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]] vec![vec![ops.iter().flat_map(|op| op.query()).cloned().collect()]]
} else { } else {
ops.into_iter().map(recurse).flatten().collect() ops.iter().map(recurse).flatten().collect()
}, },
Operation::Query(query) => vec![vec![vec![query.clone()]]], Operation::Query(query) => vec![vec![vec![query.clone()]]],
} }

View File

@ -1,4 +1,4 @@
use std::{collections::HashMap, mem}; use std::mem::take;
use log::debug; use log::debug;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -60,30 +60,41 @@ impl<'t> Criterion for Exactness<'t> {
self.query_tree = None; self.query_tree = None;
}, },
Some(state) => { Some(state) => {
let (candidates, state) = resolve_state(self.ctx, mem::take(state), &self.query)?; let (candidates, state) = resolve_state(self.ctx, take(state), &self.query)?;
self.state = state; self.state = state;
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.clone(), query_tree: self.query_tree.clone(),
candidates: Some(candidates), candidates: Some(candidates),
bucket_candidates: mem::take(&mut self.bucket_candidates), filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
None => { None => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
let candidates = match candidates { let mut candidates = match candidates {
Some(candidates) => candidates, Some(candidates) => candidates,
None => resolve_query_tree(self.ctx, &query_tree, &mut HashMap::new(), params.wdcache)?, None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
}; };
if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
self.state = Some(State::new(candidates)); self.state = Some(State::new(candidates));
self.query_tree = Some(query_tree); self.query_tree = Some(query_tree);
self.bucket_candidates |= bucket_candidates;
}, },
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => { Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree, query_tree: None,
candidates, candidates,
filtered_candidates,
bucket_candidates, bucket_candidates,
})); }));
}, },

View File

@ -1,5 +1,3 @@
use std::collections::HashMap;
use log::debug; use log::debug;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -32,34 +30,33 @@ impl<'t> Final<'t> {
#[logging_timer::time("Final::{}")] #[logging_timer::time("Final::{}")]
pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> anyhow::Result<Option<FinalResult>> { pub fn next(&mut self, excluded_candidates: &RoaringBitmap) -> anyhow::Result<Option<FinalResult>> {
loop { debug!("Final iteration");
debug!("Final iteration"); let excluded_candidates = &self.returned_candidates | excluded_candidates;
let mut criterion_parameters = CriterionParameters { let mut criterion_parameters = CriterionParameters {
wdcache: &mut self.wdcache, wdcache: &mut self.wdcache,
// returned_candidates is merged with excluded_candidates to avoid duplicas // returned_candidates is merged with excluded_candidates to avoid duplicas
excluded_candidates: &(&self.returned_candidates | excluded_candidates), excluded_candidates: &excluded_candidates,
}; };
match self.parent.next(&mut criterion_parameters)? { match self.parent.next(&mut criterion_parameters)? {
Some(CriterionResult { query_tree, candidates, mut bucket_candidates }) => { Some(CriterionResult { query_tree, candidates, filtered_candidates, bucket_candidates }) => {
let candidates = match candidates { let mut candidates = match (candidates, query_tree.as_ref()) {
Some(candidates) => candidates, (Some(candidates), _) => candidates,
None => { (None, Some(qt)) => resolve_query_tree(self.ctx, qt, &mut self.wdcache)? - excluded_candidates,
let candidates = match query_tree.as_ref() { (None, None) => self.ctx.documents_ids()? - excluded_candidates,
Some(qt) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?, };
None => self.ctx.documents_ids()?,
};
bucket_candidates |= &candidates;
candidates
}
};
self.returned_candidates |= &candidates; if let Some(filtered_candidates) = filtered_candidates {
candidates &= filtered_candidates;
}
return Ok(Some(FinalResult { query_tree, candidates, bucket_candidates })); let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone());
},
None => return Ok(None), self.returned_candidates |= &candidates;
}
Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }))
},
None => Ok(None),
} }
} }
} }

View File

@ -9,11 +9,12 @@ pub struct Initial {
} }
impl Initial { impl Initial {
pub fn new(query_tree: Option<Operation>, mut candidates: Option<RoaringBitmap>) -> Initial { pub fn new(query_tree: Option<Operation>, filtered_candidates: Option<RoaringBitmap>) -> Initial {
let answer = CriterionResult { let answer = CriterionResult {
query_tree, query_tree,
candidates: candidates.clone(), candidates: None,
bucket_candidates: candidates.take().unwrap_or_default(), filtered_candidates,
bucket_candidates: None,
}; };
Initial { answer: Some(answer) } Initial { answer: Some(answer) }
} }

View File

@ -38,8 +38,10 @@ pub struct CriterionResult {
/// The candidates that this criterion is allowed to return subsets of, /// The candidates that this criterion is allowed to return subsets of,
/// if None, it is up to the child to compute the candidates itself. /// if None, it is up to the child to compute the candidates itself.
candidates: Option<RoaringBitmap>, candidates: Option<RoaringBitmap>,
/// The candidates, coming from facet filters, that this criterion is allowed to return subsets of.
filtered_candidates: Option<RoaringBitmap>,
/// Candidates that comes from the current bucket of the initial criterion. /// Candidates that comes from the current bucket of the initial criterion.
bucket_candidates: RoaringBitmap, bucket_candidates: Option<RoaringBitmap>,
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -57,15 +59,6 @@ enum Candidates {
Forbidden(RoaringBitmap) Forbidden(RoaringBitmap)
} }
impl Candidates {
fn into_inner(self) -> RoaringBitmap {
match self {
Self::Allowed(inner) => inner,
Self::Forbidden(inner) => inner,
}
}
}
impl Default for Candidates { impl Default for Candidates {
fn default() -> Self { fn default() -> Self {
Self::Forbidden(RoaringBitmap::new()) Self::Forbidden(RoaringBitmap::new())
@ -236,14 +229,12 @@ impl<'t> CriteriaBuilder<'t> {
pub fn resolve_query_tree<'t>( pub fn resolve_query_tree<'t>(
ctx: &'t dyn Context, ctx: &'t dyn Context,
query_tree: &Operation, query_tree: &Operation,
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap> ) -> anyhow::Result<RoaringBitmap>
{ {
fn resolve_operation<'t>( fn resolve_operation<'t>(
ctx: &'t dyn Context, ctx: &'t dyn Context,
query_tree: &Operation, query_tree: &Operation,
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
wdcache: &mut WordDerivationsCache, wdcache: &mut WordDerivationsCache,
) -> anyhow::Result<RoaringBitmap> ) -> anyhow::Result<RoaringBitmap>
{ {
@ -252,7 +243,7 @@ pub fn resolve_query_tree<'t>(
match query_tree { match query_tree {
And(ops) => { And(ops) => {
let mut ops = ops.iter().map(|op| { let mut ops = ops.iter().map(|op| {
resolve_operation(ctx, op, cache, wdcache) resolve_operation(ctx, op, wdcache)
}).collect::<anyhow::Result<Vec<_>>>()?; }).collect::<anyhow::Result<Vec<_>>>()?;
ops.sort_unstable_by_key(|cds| cds.len()); ops.sort_unstable_by_key(|cds| cds.len());
@ -296,7 +287,7 @@ pub fn resolve_query_tree<'t>(
Or(_, ops) => { Or(_, ops) => {
let mut candidates = RoaringBitmap::new(); let mut candidates = RoaringBitmap::new();
for op in ops { for op in ops {
let docids = resolve_operation(ctx, op, cache, wdcache)?; let docids = resolve_operation(ctx, op, wdcache)?;
candidates.union_with(&docids); candidates.union_with(&docids);
} }
Ok(candidates) Ok(candidates)
@ -305,7 +296,7 @@ pub fn resolve_query_tree<'t>(
} }
} }
resolve_operation(ctx, query_tree, cache, wdcache) resolve_operation(ctx, query_tree, wdcache)
} }

View File

@ -30,8 +30,8 @@ const PROXIMITY_THRESHOLD: u8 = 0;
pub struct Proximity<'t> { pub struct Proximity<'t> {
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
/// ((max_proximity, query_tree), allowed_candidates) /// (max_proximity, query_tree, allowed_candidates)
state: Option<(Option<(usize, Operation)>, RoaringBitmap)>, state: Option<(u8, Operation, RoaringBitmap)>,
proximity: u8, proximity: u8,
bucket_candidates: RoaringBitmap, bucket_candidates: RoaringBitmap,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
@ -57,114 +57,96 @@ impl<'t> Criterion for Proximity<'t> {
#[logging_timer::time("Proximity::{}")] #[logging_timer::time("Proximity::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
// remove excluded candidates when next is called, instead of doing it in the loop. // remove excluded candidates when next is called, instead of doing it in the loop.
if let Some((_, candidates)) = self.state.as_mut() { if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
*candidates -= params.excluded_candidates; *allowed_candidates -= params.excluded_candidates;
} }
loop { loop {
debug!("Proximity at iteration {} (max prox {:?}) ({:?})", debug!("Proximity at iteration {} (max prox {:?}) ({:?})",
self.proximity, self.proximity,
self.state.as_ref().map(|(qt, _)| qt.as_ref().map(|(mp, _)| mp)), self.state.as_ref().map(|(mp, _, _)| mp),
self.state.as_ref().map(|(_, cd)| cd), self.state.as_ref().map(|(_, _, cd)| cd),
); );
match &mut self.state { match &mut self.state {
Some((_, candidates)) if candidates.is_empty() => { Some((max_prox, _, allowed_candidates)) if allowed_candidates.is_empty() || self.proximity > *max_prox => {
self.state = None; // reset state self.state = None; // reset state
}, },
Some((Some((max_prox, query_tree)), candidates)) => { Some((_, query_tree, allowed_candidates)) => {
if self.proximity as usize > *max_prox { let mut new_candidates = if allowed_candidates.len() <= CANDIDATES_THRESHOLD && self.proximity > PROXIMITY_THRESHOLD {
self.state = None; // reset state if let Some(cache) = self.plane_sweep_cache.as_mut() {
} else { match cache.next() {
let mut new_candidates = if candidates.len() <= CANDIDATES_THRESHOLD && self.proximity > PROXIMITY_THRESHOLD { Some((p, candidates)) => {
if let Some(cache) = self.plane_sweep_cache.as_mut() { self.proximity = p;
match cache.next() { candidates
Some((p, candidates)) => { },
self.proximity = p; None => {
candidates self.state = None; // reset state
}, continue
None => { },
self.state = None; // reset state
continue
},
}
} else {
let cache = resolve_plane_sweep_candidates(
self.ctx,
query_tree,
candidates,
params.wdcache,
)?;
self.plane_sweep_cache = Some(cache.into_iter());
continue
} }
} else { // use set theory based algorithm } else {
resolve_candidates( let cache = resolve_plane_sweep_candidates(
self.ctx, self.ctx,
&query_tree, query_tree,
self.proximity, allowed_candidates,
&mut self.candidates_cache, params.wdcache,
params.wdcache, )?;
)? self.plane_sweep_cache = Some(cache.into_iter());
};
new_candidates.intersect_with(&candidates); continue
candidates.difference_with(&new_candidates); }
self.proximity += 1; } else { // use set theory based algorithm
resolve_candidates(
self.ctx,
&query_tree,
self.proximity,
&mut self.candidates_cache,
params.wdcache,
)?
};
new_candidates &= &*allowed_candidates;
*allowed_candidates -= &new_candidates;
self.proximity += 1;
return Ok(Some(CriterionResult {
query_tree: Some(query_tree.clone()),
candidates: Some(new_candidates),
bucket_candidates: take(&mut self.bucket_candidates),
}));
}
},
Some((None, candidates)) => {
let candidates = take(candidates);
self.state = None; // reset state
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: Some(query_tree.clone()),
candidates: Some(candidates.clone()), candidates: Some(new_candidates),
bucket_candidates: candidates, filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
})); }));
}, },
None => { None => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult { let mut candidates = match candidates {
query_tree: None, Some(candidates) => candidates,
candidates: None, None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
bucket_candidates,
}));
},
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
let candidates_is_some = candidates.is_some();
let candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
(Some(qt), None) => {
let candidates = resolve_query_tree(self.ctx, qt, &mut HashMap::new(), params.wdcache)?;
candidates - params.excluded_candidates
},
(None, None) => RoaringBitmap::new(),
}; };
// If our parent returns candidates it means that the bucket if let Some(filtered_candidates) = filtered_candidates {
// candidates were already computed before and we can use them. candidates &= filtered_candidates;
//
// If not, we must use the just computed candidates as our bucket
// candidates.
if candidates_is_some {
self.bucket_candidates.union_with(&bucket_candidates);
} else {
self.bucket_candidates.union_with(&candidates);
} }
let query_tree = query_tree.map(|op| (maximum_proximity(&op), op)); match bucket_candidates {
self.state = Some((query_tree, candidates)); Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
}
let maximum_proximity = maximum_proximity(&query_tree);
self.state = Some((maximum_proximity as u8, query_tree, candidates));
self.proximity = 0; self.proximity = 0;
self.plane_sweep_cache = None; self.plane_sweep_cache = None;
}, },
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
}));
},
None => return Ok(None), None => return Ok(None),
} }
}, },

View File

@ -13,15 +13,19 @@ use super::{
CriterionParameters, CriterionParameters,
CriterionResult, CriterionResult,
query_docids, query_docids,
query_pair_proximity_docids query_pair_proximity_docids,
resolve_query_tree,
}; };
/// Maximum number of typo for a word of any length.
const MAX_TYPOS_PER_WORD: u8 = 2;
pub struct Typo<'t> { pub struct Typo<'t> {
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
query_tree: Option<(usize, Operation)>, /// (max_typos, query_tree, candidates)
number_typos: u8, state: Option<(u8, Operation, Candidates)>,
candidates: Candidates, typos: u8,
bucket_candidates: RoaringBitmap, bucket_candidates: Option<RoaringBitmap>,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>, candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
} }
@ -30,10 +34,9 @@ impl<'t> Typo<'t> {
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self { pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
Typo { Typo {
ctx, ctx,
query_tree: None, state: None,
number_typos: 0, typos: 0,
candidates: Candidates::default(), bucket_candidates: None,
bucket_candidates: RoaringBitmap::new(),
parent, parent,
candidates_cache: HashMap::new(), candidates_cache: HashMap::new(),
} }
@ -45,114 +48,107 @@ impl<'t> Criterion for Typo<'t> {
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
use Candidates::{Allowed, Forbidden}; use Candidates::{Allowed, Forbidden};
// remove excluded candidates when next is called, instead of doing it in the loop. // remove excluded candidates when next is called, instead of doing it in the loop.
match &mut self.candidates { match self.state.as_mut() {
Allowed(candidates) => *candidates -= params.excluded_candidates, Some((_, _, Allowed(candidates))) => *candidates -= params.excluded_candidates,
Forbidden(candidates) => *candidates |= params.excluded_candidates, Some((_, _, Forbidden(candidates))) => *candidates |= params.excluded_candidates,
None => (),
} }
loop { loop {
debug!("Typo at iteration {} ({:?})", self.number_typos, self.candidates); debug!("Typo at iteration {} (max typos {:?}) ({:?})",
self.typos,
self.state.as_ref().map(|(mt, _, _)| mt),
self.state.as_ref().map(|(_, _, cd)| cd),
);
match self.state.as_mut() {
Some((max_typos, _, _)) if self.typos > *max_typos => {
self.state = None; // reset state
},
Some((_, _, Allowed(allowed_candidates))) if allowed_candidates.is_empty() => {
self.state = None; // reset state
},
Some((_, query_tree, candidates_authorization)) => {
let fst = self.ctx.words_fst();
let new_query_tree = match self.typos {
typos if typos < MAX_TYPOS_PER_WORD => {
alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?
},
MAX_TYPOS_PER_WORD => {
// When typos >= MAX_TYPOS_PER_WORD, no more alteration of the query tree is possible,
// we keep the altered query tree
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?;
// we compute the allowed candidates
let query_tree_allowed_candidates = resolve_query_tree(self.ctx, query_tree, params.wdcache)?;
// we assign the allowed candidates to the candidates authorization.
*candidates_authorization = match take(candidates_authorization) {
Allowed(allowed_candidates) => Allowed(query_tree_allowed_candidates & allowed_candidates),
Forbidden(forbidden_candidates) => Allowed(query_tree_allowed_candidates - forbidden_candidates),
};
query_tree.clone()
},
_otherwise => query_tree.clone(),
};
let mut candidates = resolve_candidates(
self.ctx,
&new_query_tree,
self.typos,
&mut self.candidates_cache,
params.wdcache,
)?;
match candidates_authorization {
Allowed(allowed_candidates) => {
candidates &= &*allowed_candidates;
*allowed_candidates -= &candidates;
},
Forbidden(forbidden_candidates) => {
candidates -= &*forbidden_candidates;
*forbidden_candidates |= &candidates;
},
}
let bucket_candidates = match self.bucket_candidates.as_mut() {
Some(bucket_candidates) => take(bucket_candidates),
None => candidates.clone(),
};
self.typos += 1;
match (&mut self.query_tree, &mut self.candidates) {
(_, Allowed(candidates)) if candidates.is_empty() => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: self.query_tree.take().map(|(_, qt)| qt), query_tree: Some(new_query_tree),
candidates: Some(take(&mut self.candidates).into_inner()), candidates: Some(candidates),
bucket_candidates: take(&mut self.bucket_candidates), filtered_candidates: None,
bucket_candidates: Some(bucket_candidates),
})); }));
}, },
(Some((max_typos, query_tree)), Allowed(candidates)) => { None => {
if self.number_typos as usize > *max_typos {
self.query_tree = None;
self.candidates = Candidates::default();
} else {
let fst = self.ctx.words_fst();
let new_query_tree = if self.number_typos < 2 {
alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?
} else if self.number_typos == 2 {
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?;
query_tree.clone()
} else {
query_tree.clone()
};
let mut new_candidates = resolve_candidates(
self.ctx,
&new_query_tree,
self.number_typos,
&mut self.candidates_cache,
params.wdcache,
)?;
new_candidates.intersect_with(&candidates);
candidates.difference_with(&new_candidates);
self.number_typos += 1;
return Ok(Some(CriterionResult {
query_tree: Some(new_query_tree),
candidates: Some(new_candidates),
bucket_candidates: take(&mut self.bucket_candidates),
}));
}
},
(Some((max_typos, query_tree)), Forbidden(candidates)) => {
if self.number_typos as usize > *max_typos {
self.query_tree = None;
self.candidates = Candidates::default();
} else {
let fst = self.ctx.words_fst();
let new_query_tree = if self.number_typos < 2 {
alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?
} else if self.number_typos == 2 {
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?;
query_tree.clone()
} else {
query_tree.clone()
};
let mut new_candidates = resolve_candidates(
self.ctx,
&new_query_tree,
self.number_typos,
&mut self.candidates_cache,
params.wdcache,
)?;
new_candidates.difference_with(&candidates);
candidates.union_with(&new_candidates);
self.number_typos += 1;
self.bucket_candidates.union_with(&new_candidates);
return Ok(Some(CriterionResult {
query_tree: Some(new_query_tree),
candidates: Some(new_candidates),
bucket_candidates: take(&mut self.bucket_candidates),
}));
}
},
(None, Allowed(_)) => {
let candidates = take(&mut self.candidates).into_inner();
return Ok(Some(CriterionResult {
query_tree: None,
candidates: Some(candidates.clone()),
bucket_candidates: candidates,
}));
},
(None, Forbidden(_)) => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc),
};
let candidates = match candidates.or(filtered_candidates) {
Some(candidates) => Candidates::Allowed(candidates - params.excluded_candidates),
None => Candidates::Forbidden(params.excluded_candidates.clone()),
};
let maximum_typos = maximum_typo(&query_tree) as u8;
self.state = Some((maximum_typos, query_tree, candidates));
self.typos = 0;
},
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates: None, candidates,
filtered_candidates,
bucket_candidates, bucket_candidates,
})); }));
}, },
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
self.query_tree = query_tree.map(|op| (maximum_typo(&op), op));
self.number_typos = 0;
self.candidates = candidates.map_or_else(|| {
Candidates::Forbidden(params.excluded_candidates.clone())
}, Candidates::Allowed);
self.bucket_candidates.union_with(&bucket_candidates);
},
None => return Ok(None), None => return Ok(None),
} }
}, },
@ -185,7 +181,6 @@ fn alterate_query_tree(
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache)) ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
}, },
Operation::Query(q) => { Operation::Query(q) => {
// TODO may be optimized when number_typos == 0
if let QueryKind::Tolerant { typo, word } = &q.kind { if let QueryKind::Tolerant { typo, word } = &q.kind {
// if no typo is allowed we don't call word_derivations function, // if no typo is allowed we don't call word_derivations function,
// and directly create an Exact query // and directly create an Exact query
@ -197,7 +192,7 @@ fn alterate_query_tree(
} else { } else {
let typo = *typo.min(&number_typos); let typo = *typo.min(&number_typos);
let words = word_derivations(word, q.prefix, typo, words_fst, wdcache)?; let words = word_derivations(word, q.prefix, typo, words_fst, wdcache)?;
let queries = words.into_iter().map(|(word, typo)| { let queries = words.iter().map(|(word, typo)| {
Operation::Query(Query { Operation::Query(Query {
prefix: false, prefix: false,
kind: QueryKind::Exact { original_typo: *typo, word: word.to_string() }, kind: QueryKind::Exact { original_typo: *typo, word: word.to_string() },
@ -384,7 +379,8 @@ mod test {
]), ]),
])), ])),
candidates: Some(candidates_1.clone()), candidates: Some(candidates_1.clone()),
bucket_candidates: candidates_1, bucket_candidates: Some(candidates_1),
filtered_candidates: None,
}; };
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1)); assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
@ -406,7 +402,8 @@ mod test {
]), ]),
])), ])),
candidates: Some(candidates_2.clone()), candidates: Some(candidates_2.clone()),
bucket_candidates: candidates_2, bucket_candidates: Some(candidates_2),
filtered_candidates: None,
}; };
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2)); assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));
@ -427,8 +424,9 @@ mod test {
let expected = CriterionResult { let expected = CriterionResult {
query_tree: None, query_tree: None,
candidates: Some(facet_candidates.clone()), candidates: None,
bucket_candidates: facet_candidates, bucket_candidates: None,
filtered_candidates: Some(facet_candidates.clone()),
}; };
// first iteration, returns the facet candidates // first iteration, returns the facet candidates
@ -471,7 +469,8 @@ mod test {
]), ]),
])), ])),
candidates: Some(&candidates_1 & &facet_candidates), candidates: Some(&candidates_1 & &facet_candidates),
bucket_candidates: facet_candidates.clone(), bucket_candidates: Some(&candidates_1 & &facet_candidates),
filtered_candidates: None,
}; };
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1)); assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
@ -493,7 +492,8 @@ mod test {
]), ]),
])), ])),
candidates: Some(&candidates_2 & &facet_candidates), candidates: Some(&candidates_2 & &facet_candidates),
bucket_candidates: RoaringBitmap::new(), bucket_candidates: Some(&candidates_2 & &facet_candidates),
filtered_candidates: None,
}; };
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2)); assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));

View File

@ -1,4 +1,3 @@
use std::collections::HashMap;
use std::mem::take; use std::mem::take;
use log::debug; use log::debug;
@ -11,9 +10,9 @@ pub struct Words<'t> {
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
query_trees: Vec<Operation>, query_trees: Vec<Operation>,
candidates: Option<RoaringBitmap>, candidates: Option<RoaringBitmap>,
bucket_candidates: RoaringBitmap, bucket_candidates: Option<RoaringBitmap>,
filtered_candidates: Option<RoaringBitmap>,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
} }
impl<'t> Words<'t> { impl<'t> Words<'t> {
@ -22,9 +21,9 @@ impl<'t> Words<'t> {
ctx, ctx,
query_trees: Vec::default(), query_trees: Vec::default(),
candidates: None, candidates: None,
bucket_candidates: RoaringBitmap::new(), bucket_candidates: None,
parent, parent,
candidates_cache: HashMap::default(), filtered_candidates: None,
} }
} }
} }
@ -40,55 +39,50 @@ impl<'t> Criterion for Words<'t> {
loop { loop {
debug!("Words at iteration {} ({:?})", self.query_trees.len(), self.candidates); debug!("Words at iteration {} ({:?})", self.query_trees.len(), self.candidates);
match (self.query_trees.pop(), &mut self.candidates) { match self.query_trees.pop() {
(query_tree, Some(candidates)) if candidates.is_empty() => { Some(query_tree) => {
self.query_trees = Vec::new(); let candidates = match self.candidates.as_mut() {
return Ok(Some(CriterionResult { Some(allowed_candidates) => {
query_tree, let mut candidates = resolve_query_tree(self.ctx, &query_tree, params.wdcache)?;
candidates: self.candidates.take(), candidates &= &*allowed_candidates;
bucket_candidates: take(&mut self.bucket_candidates), *allowed_candidates -= &candidates;
})); Some(candidates)
}, },
(Some(qt), Some(candidates)) => { None => None,
let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache, params.wdcache)?; };
found_candidates.intersect_with(&candidates);
candidates.difference_with(&found_candidates); let bucket_candidates = match self.bucket_candidates.as_mut() {
Some(bucket_candidates) => Some(take(bucket_candidates)),
None => None,
};
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(qt), query_tree: Some(query_tree),
candidates: Some(found_candidates), candidates,
bucket_candidates: take(&mut self.bucket_candidates), filtered_candidates: self.filtered_candidates.clone(),
bucket_candidates,
})); }));
}, },
(Some(qt), None) => { None => {
return Ok(Some(CriterionResult {
query_tree: Some(qt),
candidates: None,
bucket_candidates: take(&mut self.bucket_candidates),
}));
},
(None, Some(_)) => {
let candidates = self.candidates.take();
return Ok(Some(CriterionResult {
query_tree: None,
candidates: candidates.clone(),
bucket_candidates: candidates.unwrap_or_default(),
}));
},
(None, None) => {
match self.parent.next(params)? { match self.parent.next(params)? {
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => { Some(CriterionResult { query_tree: Some(query_tree), candidates, filtered_candidates, bucket_candidates }) => {
self.query_trees = explode_query_tree(query_tree);
self.candidates = candidates;
self.filtered_candidates = filtered_candidates;
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc),
};
},
Some(CriterionResult { query_tree: None, candidates, filtered_candidates, bucket_candidates }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates: None, candidates,
filtered_candidates,
bucket_candidates, bucket_candidates,
})); }));
}, },
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default();
self.candidates = candidates;
self.bucket_candidates.union_with(&bucket_candidates);
},
None => return Ok(None), None => return Ok(None),
} }
}, },

View File

@ -241,7 +241,7 @@ impl<'a> QueryTreeBuilder<'a> {
} }
/// Split the word depending on the frequency of subwords in the database documents. /// Split the word depending on the frequency of subwords in the database documents.
fn split_best_frequency<'a>(ctx: &impl Context, word: &'a str) -> heed::Result<Option<Operation>> { fn split_best_frequency(ctx: &impl Context, word: &str) -> heed::Result<Option<Operation>> {
let chars = word.char_indices().skip(1); let chars = word.char_indices().skip(1);
let mut best = None; let mut best = None;
@ -438,14 +438,14 @@ fn create_query_tree(
let start = number_phrases + (number_phrases == 0) as usize; let start = number_phrases + (number_phrases == 0) as usize;
for len in start..=query.len() { for len in start..=query.len() {
let mut word_count = len - number_phrases; let mut word_count = len - number_phrases;
let query: Vec<_> = query.iter().filter_map(|p| { let query: Vec<_> = query.iter().filter(|p| {
if p.is_phrase() { if p.is_phrase() {
Some(p) true
} else if word_count != 0 { } else if word_count != 0 {
word_count -= 1; word_count -= 1;
Some(p) true
} else { } else {
None false
} }
}) })
.cloned() .cloned()