mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 14:04:31 +01:00
Make bucket candidates optionals
This commit is contained in:
parent
c620626515
commit
e923d51b8f
@ -94,7 +94,6 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
let candidates_is_some = candidates.is_some();
|
||||
self.query_tree = query_tree;
|
||||
let candidates = match (&self.query_tree, candidates) {
|
||||
(_, Some(mut candidates)) => {
|
||||
@ -103,7 +102,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
},
|
||||
(Some(qt), None) => {
|
||||
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
|
||||
let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), params.wdcache)?;
|
||||
let mut candidates = resolve_query_tree(&context, qt, params.wdcache)?;
|
||||
candidates -= params.excluded_candidates;
|
||||
candidates.intersect_with(&self.faceted_candidates);
|
||||
candidates
|
||||
@ -111,15 +110,9 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
(None, None) => take(&mut self.faceted_candidates),
|
||||
};
|
||||
|
||||
// If our parent returns candidates it means that the bucket
|
||||
// candidates were already computed before and we can use them.
|
||||
//
|
||||
// If not, we must use the just computed candidates as our bucket
|
||||
// candidates.
|
||||
if candidates_is_some {
|
||||
self.bucket_candidates.union_with(&bucket_candidates);
|
||||
} else {
|
||||
self.bucket_candidates.union_with(&candidates);
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
|
||||
if candidates.is_empty() {
|
||||
@ -143,7 +136,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
}
|
||||
|
@ -24,13 +24,12 @@ const LEVEL_EXPONENTIATION_BASE: u32 = 4;
|
||||
/// the system to choose between one algorithm or another.
|
||||
const CANDIDATES_THRESHOLD: u64 = 1000;
|
||||
|
||||
type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
|
||||
pub struct Attribute<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
flattened_query_tree: Option<Vec<Vec<Vec<Query>>>>,
|
||||
current_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
||||
}
|
||||
|
||||
@ -38,11 +37,9 @@ impl<'t> Attribute<'t> {
|
||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
|
||||
Attribute {
|
||||
ctx,
|
||||
query_tree: None,
|
||||
candidates: None,
|
||||
state: None,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
parent,
|
||||
flattened_query_tree: None,
|
||||
current_buckets: None,
|
||||
}
|
||||
}
|
||||
@ -52,29 +49,25 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
#[logging_timer::time("Attribute::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
if let Some(candidates) = self.candidates.as_mut() {
|
||||
*candidates -= params.excluded_candidates;
|
||||
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
||||
*allowed_candidates -= params.excluded_candidates;
|
||||
}
|
||||
|
||||
loop {
|
||||
match (&self.query_tree, &mut self.candidates) {
|
||||
(_, Some(candidates)) if candidates.is_empty() => {
|
||||
match self.state.take() {
|
||||
Some((query_tree, _, allowed_candidates)) if allowed_candidates.is_empty() => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.take(),
|
||||
candidates: self.candidates.take(),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
(Some(qt), Some(candidates)) => {
|
||||
let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| {
|
||||
flatten_query_tree(&qt)
|
||||
});
|
||||
|
||||
let found_candidates = if candidates.len() < CANDIDATES_THRESHOLD {
|
||||
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
|
||||
let found_candidates = if allowed_candidates.len() < CANDIDATES_THRESHOLD {
|
||||
let current_buckets = match self.current_buckets.as_mut() {
|
||||
Some(current_buckets) => current_buckets,
|
||||
None => {
|
||||
let new_buckets = linear_compute_candidates(self.ctx, flattened_query_tree, candidates)?;
|
||||
let new_buckets = linear_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates)?;
|
||||
self.current_buckets.get_or_insert(new_buckets.into_iter())
|
||||
},
|
||||
};
|
||||
@ -83,62 +76,60 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
Some((_score, candidates)) => candidates,
|
||||
None => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.take(),
|
||||
candidates: self.candidates.take(),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
}
|
||||
} else {
|
||||
match set_compute_candidates(self.ctx, flattened_query_tree, candidates, params.wdcache)? {
|
||||
match set_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates, params.wdcache)? {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.take(),
|
||||
candidates: self.candidates.take(),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
}
|
||||
};
|
||||
|
||||
candidates.difference_with(&found_candidates);
|
||||
allowed_candidates -= &found_candidates;
|
||||
|
||||
self.state = Some((query_tree.clone(), flattened_query_tree, allowed_candidates));
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(found_candidates),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
(Some(qt), None) => {
|
||||
let mut query_tree_candidates = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), params.wdcache)?;
|
||||
query_tree_candidates -= params.excluded_candidates;
|
||||
self.bucket_candidates |= &query_tree_candidates;
|
||||
self.candidates = Some(query_tree_candidates);
|
||||
},
|
||||
(None, Some(_)) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.take(),
|
||||
candidates: self.candidates.take(),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
},
|
||||
(None, None) => {
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
||||
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
||||
let candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
|
||||
};
|
||||
|
||||
let flattened_query_tree = flatten_query_tree(&query_tree);
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
|
||||
self.state = Some((query_tree, flattened_query_tree, candidates));
|
||||
self.current_buckets = None;
|
||||
},
|
||||
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: None,
|
||||
candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
self.query_tree = query_tree;
|
||||
self.candidates = candidates;
|
||||
self.bucket_candidates |= bucket_candidates;
|
||||
self.flattened_query_tree = None;
|
||||
self.current_buckets = None;
|
||||
},
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
@ -467,7 +458,7 @@ impl<'t, 'q> Eq for Branch<'t, 'q> {}
|
||||
|
||||
fn initialize_query_level_iterators<'t, 'q>(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
branches: &'q Vec<Vec<Vec<Query>>>,
|
||||
branches: &'q FlattenedQueryTree,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> {
|
||||
@ -517,7 +508,7 @@ fn initialize_query_level_iterators<'t, 'q>(
|
||||
|
||||
fn set_compute_candidates<'t>(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
branches: &Vec<Vec<Vec<Query>>>,
|
||||
branches: &FlattenedQueryTree,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> anyhow::Result<Option<RoaringBitmap>>
|
||||
@ -570,11 +561,11 @@ fn set_compute_candidates<'t>(
|
||||
|
||||
fn linear_compute_candidates(
|
||||
ctx: &dyn Context,
|
||||
branches: &Vec<Vec<Vec<Query>>>,
|
||||
branches: &FlattenedQueryTree,
|
||||
allowed_candidates: &RoaringBitmap,
|
||||
) -> anyhow::Result<BTreeMap<u64, RoaringBitmap>>
|
||||
{
|
||||
fn compute_candidate_rank(branches: &Vec<Vec<Vec<Query>>>, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
|
||||
fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
|
||||
let mut min_rank = u64::max_value();
|
||||
for branch in branches {
|
||||
|
||||
@ -659,10 +650,10 @@ fn linear_compute_candidates(
|
||||
}
|
||||
|
||||
// TODO can we keep refs of Query
|
||||
fn flatten_query_tree(query_tree: &Operation) -> Vec<Vec<Vec<Query>>> {
|
||||
fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
||||
use crate::search::criteria::Operation::{And, Or, Consecutive};
|
||||
|
||||
fn and_recurse(head: &Operation, tail: &[Operation]) -> Vec<Vec<Vec<Query>>> {
|
||||
fn and_recurse(head: &Operation, tail: &[Operation]) -> FlattenedQueryTree {
|
||||
match tail.split_first() {
|
||||
Some((thead, tail)) => {
|
||||
let tail = and_recurse(thead, tail);
|
||||
@ -680,7 +671,7 @@ fn flatten_query_tree(query_tree: &Operation) -> Vec<Vec<Vec<Query>>> {
|
||||
}
|
||||
}
|
||||
|
||||
fn recurse(op: &Operation) -> Vec<Vec<Vec<Query>>> {
|
||||
fn recurse(op: &Operation) -> FlattenedQueryTree {
|
||||
match op {
|
||||
And(ops) | Consecutive(ops) => {
|
||||
ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t))
|
||||
|
@ -1,4 +1,4 @@
|
||||
use std::{collections::HashMap, mem};
|
||||
use std::mem::take;
|
||||
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
@ -60,13 +60,13 @@ impl<'t> Criterion for Exactness<'t> {
|
||||
self.query_tree = None;
|
||||
},
|
||||
Some(state) => {
|
||||
let (candidates, state) = resolve_state(self.ctx, mem::take(state), &self.query)?;
|
||||
let (candidates, state) = resolve_state(self.ctx, take(state), &self.query)?;
|
||||
self.state = state;
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
bucket_candidates: mem::take(&mut self.bucket_candidates),
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
None => {
|
||||
@ -74,11 +74,16 @@ impl<'t> Criterion for Exactness<'t> {
|
||||
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
||||
let candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => resolve_query_tree(self.ctx, &query_tree, &mut HashMap::new(), params.wdcache)?,
|
||||
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)?,
|
||||
};
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
|
||||
self.state = Some(State::new(candidates));
|
||||
self.query_tree = Some(query_tree);
|
||||
self.bucket_candidates |= bucket_candidates;
|
||||
},
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
|
@ -1,5 +1,3 @@
|
||||
use std::collections::HashMap;
|
||||
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
@ -41,19 +39,15 @@ impl<'t> Final<'t> {
|
||||
};
|
||||
|
||||
match self.parent.next(&mut criterion_parameters)? {
|
||||
Some(CriterionResult { query_tree, candidates, mut bucket_candidates }) => {
|
||||
let candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => {
|
||||
let candidates = match query_tree.as_ref() {
|
||||
Some(qt) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?,
|
||||
None => self.ctx.documents_ids()?,
|
||||
};
|
||||
bucket_candidates |= &candidates;
|
||||
candidates
|
||||
}
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
let candidates = match (candidates, query_tree.as_ref()) {
|
||||
(Some(candidates), _) => candidates,
|
||||
(None, Some(qt)) => resolve_query_tree(self.ctx, qt, &mut self.wdcache)?,
|
||||
(None, None) => self.ctx.documents_ids()?,
|
||||
};
|
||||
|
||||
let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone());
|
||||
|
||||
self.returned_candidates |= &candidates;
|
||||
|
||||
return Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }));
|
||||
|
@ -12,8 +12,8 @@ impl Initial {
|
||||
pub fn new(query_tree: Option<Operation>, mut candidates: Option<RoaringBitmap>) -> Initial {
|
||||
let answer = CriterionResult {
|
||||
query_tree,
|
||||
candidates: candidates.clone(),
|
||||
bucket_candidates: candidates.take().unwrap_or_default(),
|
||||
candidates: candidates.take(),
|
||||
bucket_candidates: None,
|
||||
};
|
||||
Initial { answer: Some(answer) }
|
||||
}
|
||||
|
@ -39,7 +39,7 @@ pub struct CriterionResult {
|
||||
/// if None, it is up to the child to compute the candidates itself.
|
||||
candidates: Option<RoaringBitmap>,
|
||||
/// Candidates that comes from the current bucket of the initial criterion.
|
||||
bucket_candidates: RoaringBitmap,
|
||||
bucket_candidates: Option<RoaringBitmap>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
@ -57,15 +57,6 @@ enum Candidates {
|
||||
Forbidden(RoaringBitmap)
|
||||
}
|
||||
|
||||
impl Candidates {
|
||||
fn into_inner(self) -> RoaringBitmap {
|
||||
match self {
|
||||
Self::Allowed(inner) => inner,
|
||||
Self::Forbidden(inner) => inner,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl Default for Candidates {
|
||||
fn default() -> Self {
|
||||
Self::Forbidden(RoaringBitmap::new())
|
||||
@ -236,14 +227,12 @@ impl<'t> CriteriaBuilder<'t> {
|
||||
pub fn resolve_query_tree<'t>(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: &Operation,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
{
|
||||
fn resolve_operation<'t>(
|
||||
ctx: &'t dyn Context,
|
||||
query_tree: &Operation,
|
||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
||||
wdcache: &mut WordDerivationsCache,
|
||||
) -> anyhow::Result<RoaringBitmap>
|
||||
{
|
||||
@ -252,7 +241,7 @@ pub fn resolve_query_tree<'t>(
|
||||
match query_tree {
|
||||
And(ops) => {
|
||||
let mut ops = ops.iter().map(|op| {
|
||||
resolve_operation(ctx, op, cache, wdcache)
|
||||
resolve_operation(ctx, op, wdcache)
|
||||
}).collect::<anyhow::Result<Vec<_>>>()?;
|
||||
|
||||
ops.sort_unstable_by_key(|cds| cds.len());
|
||||
@ -296,7 +285,7 @@ pub fn resolve_query_tree<'t>(
|
||||
Or(_, ops) => {
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
for op in ops {
|
||||
let docids = resolve_operation(ctx, op, cache, wdcache)?;
|
||||
let docids = resolve_operation(ctx, op, wdcache)?;
|
||||
candidates.union_with(&docids);
|
||||
}
|
||||
Ok(candidates)
|
||||
@ -305,7 +294,7 @@ pub fn resolve_query_tree<'t>(
|
||||
}
|
||||
}
|
||||
|
||||
resolve_operation(ctx, query_tree, cache, wdcache)
|
||||
resolve_operation(ctx, query_tree, wdcache)
|
||||
}
|
||||
|
||||
|
||||
|
@ -30,8 +30,8 @@ const PROXIMITY_THRESHOLD: u8 = 0;
|
||||
|
||||
pub struct Proximity<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
/// ((max_proximity, query_tree), allowed_candidates)
|
||||
state: Option<(Option<(usize, Operation)>, RoaringBitmap)>,
|
||||
/// (max_proximity, query_tree, allowed_candidates)
|
||||
state: Option<(u8, Operation, RoaringBitmap)>,
|
||||
proximity: u8,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
@ -57,114 +57,90 @@ impl<'t> Criterion for Proximity<'t> {
|
||||
#[logging_timer::time("Proximity::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
if let Some((_, candidates)) = self.state.as_mut() {
|
||||
*candidates -= params.excluded_candidates;
|
||||
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
||||
*allowed_candidates -= params.excluded_candidates;
|
||||
}
|
||||
|
||||
loop {
|
||||
debug!("Proximity at iteration {} (max prox {:?}) ({:?})",
|
||||
self.proximity,
|
||||
self.state.as_ref().map(|(qt, _)| qt.as_ref().map(|(mp, _)| mp)),
|
||||
self.state.as_ref().map(|(_, cd)| cd),
|
||||
self.state.as_ref().map(|(mp, _, _)| mp),
|
||||
self.state.as_ref().map(|(_, _, cd)| cd),
|
||||
);
|
||||
|
||||
match &mut self.state {
|
||||
Some((_, candidates)) if candidates.is_empty() => {
|
||||
Some((max_prox, _, allowed_candidates)) if allowed_candidates.is_empty() || self.proximity > *max_prox => {
|
||||
self.state = None; // reset state
|
||||
},
|
||||
Some((Some((max_prox, query_tree)), candidates)) => {
|
||||
if self.proximity as usize > *max_prox {
|
||||
self.state = None; // reset state
|
||||
} else {
|
||||
let mut new_candidates = if candidates.len() <= CANDIDATES_THRESHOLD && self.proximity > PROXIMITY_THRESHOLD {
|
||||
if let Some(cache) = self.plane_sweep_cache.as_mut() {
|
||||
match cache.next() {
|
||||
Some((p, candidates)) => {
|
||||
self.proximity = p;
|
||||
candidates
|
||||
},
|
||||
None => {
|
||||
self.state = None; // reset state
|
||||
continue
|
||||
},
|
||||
}
|
||||
} else {
|
||||
let cache = resolve_plane_sweep_candidates(
|
||||
self.ctx,
|
||||
query_tree,
|
||||
candidates,
|
||||
params.wdcache,
|
||||
)?;
|
||||
self.plane_sweep_cache = Some(cache.into_iter());
|
||||
|
||||
continue
|
||||
Some((_, query_tree, allowed_candidates)) => {
|
||||
let mut new_candidates = if allowed_candidates.len() <= CANDIDATES_THRESHOLD && self.proximity > PROXIMITY_THRESHOLD {
|
||||
if let Some(cache) = self.plane_sweep_cache.as_mut() {
|
||||
match cache.next() {
|
||||
Some((p, candidates)) => {
|
||||
self.proximity = p;
|
||||
candidates
|
||||
},
|
||||
None => {
|
||||
self.state = None; // reset state
|
||||
continue
|
||||
},
|
||||
}
|
||||
} else { // use set theory based algorithm
|
||||
resolve_candidates(
|
||||
self.ctx,
|
||||
&query_tree,
|
||||
self.proximity,
|
||||
&mut self.candidates_cache,
|
||||
params.wdcache,
|
||||
)?
|
||||
};
|
||||
} else {
|
||||
let cache = resolve_plane_sweep_candidates(
|
||||
self.ctx,
|
||||
query_tree,
|
||||
allowed_candidates,
|
||||
params.wdcache,
|
||||
)?;
|
||||
self.plane_sweep_cache = Some(cache.into_iter());
|
||||
|
||||
new_candidates.intersect_with(&candidates);
|
||||
candidates.difference_with(&new_candidates);
|
||||
self.proximity += 1;
|
||||
continue
|
||||
}
|
||||
} else { // use set theory based algorithm
|
||||
resolve_candidates(
|
||||
self.ctx,
|
||||
&query_tree,
|
||||
self.proximity,
|
||||
&mut self.candidates_cache,
|
||||
params.wdcache,
|
||||
)?
|
||||
};
|
||||
|
||||
new_candidates &= &*allowed_candidates;
|
||||
*allowed_candidates -= &new_candidates;
|
||||
self.proximity += 1;
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree.clone()),
|
||||
candidates: Some(new_candidates),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
}
|
||||
},
|
||||
Some((None, candidates)) => {
|
||||
let candidates = take(candidates);
|
||||
self.state = None; // reset state
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: Some(candidates.clone()),
|
||||
bucket_candidates: candidates,
|
||||
query_tree: Some(query_tree.clone()),
|
||||
candidates: Some(new_candidates),
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
}));
|
||||
},
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: None,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
let candidates_is_some = candidates.is_some();
|
||||
let candidates = match (&query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
(Some(qt), None) => {
|
||||
let candidates = resolve_query_tree(self.ctx, qt, &mut HashMap::new(), params.wdcache)?;
|
||||
candidates - params.excluded_candidates
|
||||
},
|
||||
(None, None) => RoaringBitmap::new(),
|
||||
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
||||
let candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
|
||||
};
|
||||
|
||||
// If our parent returns candidates it means that the bucket
|
||||
// candidates were already computed before and we can use them.
|
||||
//
|
||||
// If not, we must use the just computed candidates as our bucket
|
||||
// candidates.
|
||||
if candidates_is_some {
|
||||
self.bucket_candidates.union_with(&bucket_candidates);
|
||||
} else {
|
||||
self.bucket_candidates.union_with(&candidates);
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
}
|
||||
|
||||
let query_tree = query_tree.map(|op| (maximum_proximity(&op), op));
|
||||
self.state = Some((query_tree, candidates));
|
||||
let maximum_proximity = maximum_proximity(&query_tree);
|
||||
self.state = Some((maximum_proximity as u8, query_tree, candidates));
|
||||
self.proximity = 0;
|
||||
self.plane_sweep_cache = None;
|
||||
},
|
||||
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
|
@ -13,15 +13,19 @@ use super::{
|
||||
CriterionParameters,
|
||||
CriterionResult,
|
||||
query_docids,
|
||||
query_pair_proximity_docids
|
||||
query_pair_proximity_docids,
|
||||
resolve_query_tree,
|
||||
};
|
||||
|
||||
/// Maximum number of typo for a word of any length.
|
||||
const MAX_TYPOS_PER_WORD: u8 = 2;
|
||||
|
||||
pub struct Typo<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
query_tree: Option<(usize, Operation)>,
|
||||
number_typos: u8,
|
||||
candidates: Candidates,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
/// (max_typos, query_tree, candidates)
|
||||
state: Option<(u8, Operation, Candidates)>,
|
||||
typos: u8,
|
||||
bucket_candidates: Option<RoaringBitmap>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
|
||||
}
|
||||
@ -30,10 +34,9 @@ impl<'t> Typo<'t> {
|
||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
|
||||
Typo {
|
||||
ctx,
|
||||
query_tree: None,
|
||||
number_typos: 0,
|
||||
candidates: Candidates::default(),
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
state: None,
|
||||
typos: 0,
|
||||
bucket_candidates: None,
|
||||
parent,
|
||||
candidates_cache: HashMap::new(),
|
||||
}
|
||||
@ -45,113 +48,101 @@ impl<'t> Criterion for Typo<'t> {
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
||||
use Candidates::{Allowed, Forbidden};
|
||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||
match &mut self.candidates {
|
||||
Allowed(candidates) => *candidates -= params.excluded_candidates,
|
||||
Forbidden(candidates) => *candidates |= params.excluded_candidates,
|
||||
match self.state.as_mut() {
|
||||
Some((_, _, Allowed(candidates))) => *candidates -= params.excluded_candidates,
|
||||
Some((_, _, Forbidden(candidates))) => *candidates |= params.excluded_candidates,
|
||||
None => (),
|
||||
}
|
||||
|
||||
loop {
|
||||
debug!("Typo at iteration {} ({:?})", self.number_typos, self.candidates);
|
||||
debug!("Typo at iteration {} (max typos {:?}) ({:?})",
|
||||
self.typos,
|
||||
self.state.as_ref().map(|(mt, _, _)| mt),
|
||||
self.state.as_ref().map(|(_, _, cd)| cd),
|
||||
);
|
||||
|
||||
match (&mut self.query_tree, &mut self.candidates) {
|
||||
(_, Allowed(candidates)) if candidates.is_empty() => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: self.query_tree.take().map(|(_, qt)| qt),
|
||||
candidates: Some(take(&mut self.candidates).into_inner()),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
match self.state.as_mut() {
|
||||
Some((max_typos, _, _)) if self.typos > *max_typos => {
|
||||
self.state = None; // reset state
|
||||
},
|
||||
(Some((max_typos, query_tree)), Allowed(candidates)) => {
|
||||
if self.number_typos as usize > *max_typos {
|
||||
self.query_tree = None;
|
||||
self.candidates = Candidates::default();
|
||||
} else {
|
||||
let fst = self.ctx.words_fst();
|
||||
let new_query_tree = if self.number_typos < 2 {
|
||||
alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?
|
||||
} else if self.number_typos == 2 {
|
||||
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?;
|
||||
query_tree.clone()
|
||||
} else {
|
||||
query_tree.clone()
|
||||
Some((_, _, Allowed(allowed_candidates))) if allowed_candidates.is_empty() => {
|
||||
self.state = None; // reset state
|
||||
},
|
||||
Some((_, query_tree, candidates_authorization)) => {
|
||||
let fst = self.ctx.words_fst();
|
||||
let new_query_tree = if self.typos < MAX_TYPOS_PER_WORD {
|
||||
alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?
|
||||
} else if self.typos == MAX_TYPOS_PER_WORD {
|
||||
// When typos >= MAX_TYPOS_PER_WORD, no more alteration of the query tree is possible,
|
||||
// we keep the altered query tree
|
||||
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?;
|
||||
// we compute the allowed candidates
|
||||
let query_tree_allowed_candidates = resolve_query_tree(self.ctx, query_tree, params.wdcache)?;
|
||||
// we assign the allowed candidates to the candidates authorization.
|
||||
*candidates_authorization = match take(candidates_authorization) {
|
||||
Allowed(allowed_candidates) => Allowed(query_tree_allowed_candidates & allowed_candidates),
|
||||
Forbidden(forbidden_candidates) => Allowed(query_tree_allowed_candidates - forbidden_candidates),
|
||||
};
|
||||
|
||||
let mut new_candidates = resolve_candidates(
|
||||
self.ctx,
|
||||
&new_query_tree,
|
||||
self.number_typos,
|
||||
&mut self.candidates_cache,
|
||||
params.wdcache,
|
||||
)?;
|
||||
new_candidates.intersect_with(&candidates);
|
||||
candidates.difference_with(&new_candidates);
|
||||
self.number_typos += 1;
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(new_query_tree),
|
||||
candidates: Some(new_candidates),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
}
|
||||
},
|
||||
(Some((max_typos, query_tree)), Forbidden(candidates)) => {
|
||||
if self.number_typos as usize > *max_typos {
|
||||
self.query_tree = None;
|
||||
self.candidates = Candidates::default();
|
||||
query_tree.clone()
|
||||
} else {
|
||||
let fst = self.ctx.words_fst();
|
||||
let new_query_tree = if self.number_typos < 2 {
|
||||
alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?
|
||||
} else if self.number_typos == 2 {
|
||||
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?;
|
||||
query_tree.clone()
|
||||
} else {
|
||||
query_tree.clone()
|
||||
};
|
||||
query_tree.clone()
|
||||
};
|
||||
|
||||
let mut new_candidates = resolve_candidates(
|
||||
self.ctx,
|
||||
&new_query_tree,
|
||||
self.number_typos,
|
||||
&mut self.candidates_cache,
|
||||
params.wdcache,
|
||||
)?;
|
||||
new_candidates.difference_with(&candidates);
|
||||
candidates.union_with(&new_candidates);
|
||||
self.number_typos += 1;
|
||||
self.bucket_candidates.union_with(&new_candidates);
|
||||
let mut candidates = resolve_candidates(
|
||||
self.ctx,
|
||||
&new_query_tree,
|
||||
self.typos,
|
||||
&mut self.candidates_cache,
|
||||
params.wdcache,
|
||||
)?;
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(new_query_tree),
|
||||
candidates: Some(new_candidates),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
}
|
||||
},
|
||||
(None, Allowed(_)) => {
|
||||
let candidates = take(&mut self.candidates).into_inner();
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: Some(candidates.clone()),
|
||||
bucket_candidates: candidates,
|
||||
}));
|
||||
},
|
||||
(None, Forbidden(_)) => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: None,
|
||||
bucket_candidates,
|
||||
}));
|
||||
match candidates_authorization {
|
||||
Allowed(allowed_candidates) => {
|
||||
candidates &= &*allowed_candidates;
|
||||
*allowed_candidates -= &candidates;
|
||||
},
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
self.query_tree = query_tree.map(|op| (maximum_typo(&op), op));
|
||||
self.number_typos = 0;
|
||||
self.candidates = candidates.map_or_else(|| {
|
||||
Forbidden(forbidden_candidates) => {
|
||||
candidates -= &*forbidden_candidates;
|
||||
*forbidden_candidates |= &candidates;
|
||||
},
|
||||
}
|
||||
|
||||
let bucket_candidates = match self.bucket_candidates.as_mut() {
|
||||
Some(bucket_candidates) => take(bucket_candidates),
|
||||
None => candidates.clone(),
|
||||
};
|
||||
|
||||
self.typos += 1;
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(new_query_tree),
|
||||
candidates: Some(candidates),
|
||||
bucket_candidates: Some(bucket_candidates),
|
||||
}));
|
||||
},
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
||||
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
|
||||
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
|
||||
(self_bc, parent_bc) => self_bc.or(parent_bc),
|
||||
};
|
||||
|
||||
let candidates = candidates.map_or_else(|| {
|
||||
Candidates::Forbidden(params.excluded_candidates.clone())
|
||||
}, Candidates::Allowed);
|
||||
self.bucket_candidates.union_with(&bucket_candidates);
|
||||
|
||||
let maximum_typos = maximum_typo(&query_tree) as u8;
|
||||
self.state = Some((maximum_typos, query_tree, candidates));
|
||||
self.typos = 0;
|
||||
|
||||
},
|
||||
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
None => return Ok(None),
|
||||
}
|
||||
@ -185,7 +176,6 @@ fn alterate_query_tree(
|
||||
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
|
||||
},
|
||||
Operation::Query(q) => {
|
||||
// TODO may be optimized when number_typos == 0
|
||||
if let QueryKind::Tolerant { typo, word } = &q.kind {
|
||||
// if no typo is allowed we don't call word_derivations function,
|
||||
// and directly create an Exact query
|
||||
@ -384,7 +374,7 @@ mod test {
|
||||
]),
|
||||
])),
|
||||
candidates: Some(candidates_1.clone()),
|
||||
bucket_candidates: candidates_1,
|
||||
bucket_candidates: Some(candidates_1),
|
||||
};
|
||||
|
||||
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
|
||||
@ -406,7 +396,7 @@ mod test {
|
||||
]),
|
||||
])),
|
||||
candidates: Some(candidates_2.clone()),
|
||||
bucket_candidates: candidates_2,
|
||||
bucket_candidates: Some(candidates_2),
|
||||
};
|
||||
|
||||
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));
|
||||
@ -428,7 +418,7 @@ mod test {
|
||||
let expected = CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: Some(facet_candidates.clone()),
|
||||
bucket_candidates: facet_candidates,
|
||||
bucket_candidates: None,
|
||||
};
|
||||
|
||||
// first iteration, returns the facet candidates
|
||||
@ -471,7 +461,7 @@ mod test {
|
||||
]),
|
||||
])),
|
||||
candidates: Some(&candidates_1 & &facet_candidates),
|
||||
bucket_candidates: facet_candidates.clone(),
|
||||
bucket_candidates: Some(&candidates_1 & &facet_candidates),
|
||||
};
|
||||
|
||||
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
|
||||
@ -493,7 +483,7 @@ mod test {
|
||||
]),
|
||||
])),
|
||||
candidates: Some(&candidates_2 & &facet_candidates),
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
bucket_candidates: Some(&candidates_2 & &facet_candidates),
|
||||
};
|
||||
|
||||
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));
|
||||
|
@ -1,4 +1,3 @@
|
||||
use std::collections::HashMap;
|
||||
use std::mem::take;
|
||||
|
||||
use log::debug;
|
||||
@ -11,9 +10,9 @@ pub struct Words<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
query_trees: Vec<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
bucket_candidates: Option<RoaringBitmap>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
|
||||
compute_candidates: bool,
|
||||
}
|
||||
|
||||
impl<'t> Words<'t> {
|
||||
@ -22,9 +21,9 @@ impl<'t> Words<'t> {
|
||||
ctx,
|
||||
query_trees: Vec::default(),
|
||||
candidates: None,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
bucket_candidates: None,
|
||||
parent,
|
||||
candidates_cache: HashMap::default(),
|
||||
compute_candidates: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -40,55 +39,48 @@ impl<'t> Criterion for Words<'t> {
|
||||
loop {
|
||||
debug!("Words at iteration {} ({:?})", self.query_trees.len(), self.candidates);
|
||||
|
||||
match (self.query_trees.pop(), &mut self.candidates) {
|
||||
(query_tree, Some(candidates)) if candidates.is_empty() => {
|
||||
self.query_trees = Vec::new();
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree,
|
||||
candidates: self.candidates.take(),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
},
|
||||
(Some(qt), Some(candidates)) => {
|
||||
let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache, params.wdcache)?;
|
||||
found_candidates.intersect_with(&candidates);
|
||||
candidates.difference_with(&found_candidates);
|
||||
match self.query_trees.pop() {
|
||||
Some(query_tree) => {
|
||||
let candidates = match self.candidates.as_mut() {
|
||||
Some(allowed_candidates) if self.compute_candidates => {
|
||||
let mut candidates = resolve_query_tree(self.ctx, &query_tree, params.wdcache)?;
|
||||
candidates &= &*allowed_candidates;
|
||||
*allowed_candidates -= &candidates;
|
||||
Some(candidates)
|
||||
},
|
||||
candidates => candidates.cloned(),
|
||||
};
|
||||
|
||||
let bucket_candidates = match self.bucket_candidates.as_mut() {
|
||||
Some(bucket_candidates) => Some(take(bucket_candidates)),
|
||||
None => None,
|
||||
};
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(qt),
|
||||
candidates: Some(found_candidates),
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
(Some(qt), None) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(qt),
|
||||
candidates: None,
|
||||
bucket_candidates: take(&mut self.bucket_candidates),
|
||||
}));
|
||||
},
|
||||
(None, Some(_)) => {
|
||||
let candidates = self.candidates.take();
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: candidates.clone(),
|
||||
bucket_candidates: candidates.unwrap_or_default(),
|
||||
}));
|
||||
},
|
||||
(None, None) => {
|
||||
None => {
|
||||
match self.parent.next(params)? {
|
||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
||||
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
||||
self.query_trees = explode_query_tree(query_tree);
|
||||
self.candidates = candidates;
|
||||
self.compute_candidates = bucket_candidates.is_some();
|
||||
|
||||
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
|
||||
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
|
||||
(self_bc, parent_bc) => self_bc.or(parent_bc),
|
||||
};
|
||||
},
|
||||
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates: None,
|
||||
candidates,
|
||||
bucket_candidates,
|
||||
}));
|
||||
},
|
||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||
self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default();
|
||||
self.candidates = candidates;
|
||||
self.bucket_candidates.union_with(&bucket_candidates);
|
||||
},
|
||||
None => return Ok(None),
|
||||
}
|
||||
},
|
||||
|
Loading…
x
Reference in New Issue
Block a user