mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-25 20:57:35 +01:00
Make bucket candidates optionals
This commit is contained in:
parent
c620626515
commit
e923d51b8f
@ -94,7 +94,6 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
None => {
|
None => {
|
||||||
match self.parent.next(params)? {
|
match self.parent.next(params)? {
|
||||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||||
let candidates_is_some = candidates.is_some();
|
|
||||||
self.query_tree = query_tree;
|
self.query_tree = query_tree;
|
||||||
let candidates = match (&self.query_tree, candidates) {
|
let candidates = match (&self.query_tree, candidates) {
|
||||||
(_, Some(mut candidates)) => {
|
(_, Some(mut candidates)) => {
|
||||||
@ -103,7 +102,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
},
|
},
|
||||||
(Some(qt), None) => {
|
(Some(qt), None) => {
|
||||||
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
|
let context = CriteriaBuilder::new(&self.rtxn, &self.index)?;
|
||||||
let mut candidates = resolve_query_tree(&context, qt, &mut HashMap::new(), params.wdcache)?;
|
let mut candidates = resolve_query_tree(&context, qt, params.wdcache)?;
|
||||||
candidates -= params.excluded_candidates;
|
candidates -= params.excluded_candidates;
|
||||||
candidates.intersect_with(&self.faceted_candidates);
|
candidates.intersect_with(&self.faceted_candidates);
|
||||||
candidates
|
candidates
|
||||||
@ -111,15 +110,9 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
(None, None) => take(&mut self.faceted_candidates),
|
(None, None) => take(&mut self.faceted_candidates),
|
||||||
};
|
};
|
||||||
|
|
||||||
// If our parent returns candidates it means that the bucket
|
match bucket_candidates {
|
||||||
// candidates were already computed before and we can use them.
|
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||||
//
|
None => self.bucket_candidates |= &candidates,
|
||||||
// If not, we must use the just computed candidates as our bucket
|
|
||||||
// candidates.
|
|
||||||
if candidates_is_some {
|
|
||||||
self.bucket_candidates.union_with(&bucket_candidates);
|
|
||||||
} else {
|
|
||||||
self.bucket_candidates.union_with(&candidates);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if candidates.is_empty() {
|
if candidates.is_empty() {
|
||||||
@ -143,7 +136,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: self.query_tree.clone(),
|
query_tree: self.query_tree.clone(),
|
||||||
candidates: Some(candidates),
|
candidates: Some(candidates),
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
|
@ -24,13 +24,12 @@ const LEVEL_EXPONENTIATION_BASE: u32 = 4;
|
|||||||
/// the system to choose between one algorithm or another.
|
/// the system to choose between one algorithm or another.
|
||||||
const CANDIDATES_THRESHOLD: u64 = 1000;
|
const CANDIDATES_THRESHOLD: u64 = 1000;
|
||||||
|
|
||||||
|
type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
|
||||||
pub struct Attribute<'t> {
|
pub struct Attribute<'t> {
|
||||||
ctx: &'t dyn Context<'t>,
|
ctx: &'t dyn Context<'t>,
|
||||||
query_tree: Option<Operation>,
|
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
|
||||||
candidates: Option<RoaringBitmap>,
|
|
||||||
bucket_candidates: RoaringBitmap,
|
bucket_candidates: RoaringBitmap,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
flattened_query_tree: Option<Vec<Vec<Vec<Query>>>>,
|
|
||||||
current_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
current_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -38,11 +37,9 @@ impl<'t> Attribute<'t> {
|
|||||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
|
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
|
||||||
Attribute {
|
Attribute {
|
||||||
ctx,
|
ctx,
|
||||||
query_tree: None,
|
state: None,
|
||||||
candidates: None,
|
|
||||||
bucket_candidates: RoaringBitmap::new(),
|
bucket_candidates: RoaringBitmap::new(),
|
||||||
parent,
|
parent,
|
||||||
flattened_query_tree: None,
|
|
||||||
current_buckets: None,
|
current_buckets: None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -52,29 +49,25 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
#[logging_timer::time("Attribute::{}")]
|
#[logging_timer::time("Attribute::{}")]
|
||||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
||||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||||
if let Some(candidates) = self.candidates.as_mut() {
|
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
||||||
*candidates -= params.excluded_candidates;
|
*allowed_candidates -= params.excluded_candidates;
|
||||||
}
|
}
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
match (&self.query_tree, &mut self.candidates) {
|
match self.state.take() {
|
||||||
(_, Some(candidates)) if candidates.is_empty() => {
|
Some((query_tree, _, allowed_candidates)) if allowed_candidates.is_empty() => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: self.query_tree.take(),
|
query_tree: Some(query_tree),
|
||||||
candidates: self.candidates.take(),
|
candidates: Some(RoaringBitmap::new()),
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
(Some(qt), Some(candidates)) => {
|
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
|
||||||
let flattened_query_tree = self.flattened_query_tree.get_or_insert_with(|| {
|
let found_candidates = if allowed_candidates.len() < CANDIDATES_THRESHOLD {
|
||||||
flatten_query_tree(&qt)
|
|
||||||
});
|
|
||||||
|
|
||||||
let found_candidates = if candidates.len() < CANDIDATES_THRESHOLD {
|
|
||||||
let current_buckets = match self.current_buckets.as_mut() {
|
let current_buckets = match self.current_buckets.as_mut() {
|
||||||
Some(current_buckets) => current_buckets,
|
Some(current_buckets) => current_buckets,
|
||||||
None => {
|
None => {
|
||||||
let new_buckets = linear_compute_candidates(self.ctx, flattened_query_tree, candidates)?;
|
let new_buckets = linear_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates)?;
|
||||||
self.current_buckets.get_or_insert(new_buckets.into_iter())
|
self.current_buckets.get_or_insert(new_buckets.into_iter())
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
@ -83,62 +76,60 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
Some((_score, candidates)) => candidates,
|
Some((_score, candidates)) => candidates,
|
||||||
None => {
|
None => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: self.query_tree.take(),
|
query_tree: Some(query_tree),
|
||||||
candidates: self.candidates.take(),
|
candidates: Some(RoaringBitmap::new()),
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
match set_compute_candidates(self.ctx, flattened_query_tree, candidates, params.wdcache)? {
|
match set_compute_candidates(self.ctx, &flattened_query_tree, &allowed_candidates, params.wdcache)? {
|
||||||
Some(candidates) => candidates,
|
Some(candidates) => candidates,
|
||||||
None => {
|
None => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: self.query_tree.take(),
|
query_tree: Some(query_tree),
|
||||||
candidates: self.candidates.take(),
|
candidates: Some(RoaringBitmap::new()),
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
candidates.difference_with(&found_candidates);
|
allowed_candidates -= &found_candidates;
|
||||||
|
|
||||||
|
self.state = Some((query_tree.clone(), flattened_query_tree, allowed_candidates));
|
||||||
|
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: self.query_tree.clone(),
|
query_tree: Some(query_tree),
|
||||||
candidates: Some(found_candidates),
|
candidates: Some(found_candidates),
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
(Some(qt), None) => {
|
None => {
|
||||||
let mut query_tree_candidates = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), params.wdcache)?;
|
|
||||||
query_tree_candidates -= params.excluded_candidates;
|
|
||||||
self.bucket_candidates |= &query_tree_candidates;
|
|
||||||
self.candidates = Some(query_tree_candidates);
|
|
||||||
},
|
|
||||||
(None, Some(_)) => {
|
|
||||||
return Ok(Some(CriterionResult {
|
|
||||||
query_tree: self.query_tree.take(),
|
|
||||||
candidates: self.candidates.take(),
|
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
|
||||||
}));
|
|
||||||
},
|
|
||||||
(None, None) => {
|
|
||||||
match self.parent.next(params)? {
|
match self.parent.next(params)? {
|
||||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
||||||
|
let candidates = match candidates {
|
||||||
|
Some(candidates) => candidates,
|
||||||
|
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
|
||||||
|
};
|
||||||
|
|
||||||
|
let flattened_query_tree = flatten_query_tree(&query_tree);
|
||||||
|
|
||||||
|
match bucket_candidates {
|
||||||
|
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||||
|
None => self.bucket_candidates |= &candidates,
|
||||||
|
}
|
||||||
|
|
||||||
|
self.state = Some((query_tree, flattened_query_tree, candidates));
|
||||||
|
self.current_buckets = None;
|
||||||
|
},
|
||||||
|
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates: None,
|
candidates,
|
||||||
bucket_candidates,
|
bucket_candidates,
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
|
||||||
self.query_tree = query_tree;
|
|
||||||
self.candidates = candidates;
|
|
||||||
self.bucket_candidates |= bucket_candidates;
|
|
||||||
self.flattened_query_tree = None;
|
|
||||||
self.current_buckets = None;
|
|
||||||
},
|
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@ -467,7 +458,7 @@ impl<'t, 'q> Eq for Branch<'t, 'q> {}
|
|||||||
|
|
||||||
fn initialize_query_level_iterators<'t, 'q>(
|
fn initialize_query_level_iterators<'t, 'q>(
|
||||||
ctx: &'t dyn Context<'t>,
|
ctx: &'t dyn Context<'t>,
|
||||||
branches: &'q Vec<Vec<Vec<Query>>>,
|
branches: &'q FlattenedQueryTree,
|
||||||
allowed_candidates: &RoaringBitmap,
|
allowed_candidates: &RoaringBitmap,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> {
|
) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> {
|
||||||
@ -517,7 +508,7 @@ fn initialize_query_level_iterators<'t, 'q>(
|
|||||||
|
|
||||||
fn set_compute_candidates<'t>(
|
fn set_compute_candidates<'t>(
|
||||||
ctx: &'t dyn Context<'t>,
|
ctx: &'t dyn Context<'t>,
|
||||||
branches: &Vec<Vec<Vec<Query>>>,
|
branches: &FlattenedQueryTree,
|
||||||
allowed_candidates: &RoaringBitmap,
|
allowed_candidates: &RoaringBitmap,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Option<RoaringBitmap>>
|
) -> anyhow::Result<Option<RoaringBitmap>>
|
||||||
@ -570,11 +561,11 @@ fn set_compute_candidates<'t>(
|
|||||||
|
|
||||||
fn linear_compute_candidates(
|
fn linear_compute_candidates(
|
||||||
ctx: &dyn Context,
|
ctx: &dyn Context,
|
||||||
branches: &Vec<Vec<Vec<Query>>>,
|
branches: &FlattenedQueryTree,
|
||||||
allowed_candidates: &RoaringBitmap,
|
allowed_candidates: &RoaringBitmap,
|
||||||
) -> anyhow::Result<BTreeMap<u64, RoaringBitmap>>
|
) -> anyhow::Result<BTreeMap<u64, RoaringBitmap>>
|
||||||
{
|
{
|
||||||
fn compute_candidate_rank(branches: &Vec<Vec<Vec<Query>>>, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
|
fn compute_candidate_rank(branches: &FlattenedQueryTree, words_positions: HashMap<String, RoaringBitmap>) -> u64 {
|
||||||
let mut min_rank = u64::max_value();
|
let mut min_rank = u64::max_value();
|
||||||
for branch in branches {
|
for branch in branches {
|
||||||
|
|
||||||
@ -659,10 +650,10 @@ fn linear_compute_candidates(
|
|||||||
}
|
}
|
||||||
|
|
||||||
// TODO can we keep refs of Query
|
// TODO can we keep refs of Query
|
||||||
fn flatten_query_tree(query_tree: &Operation) -> Vec<Vec<Vec<Query>>> {
|
fn flatten_query_tree(query_tree: &Operation) -> FlattenedQueryTree {
|
||||||
use crate::search::criteria::Operation::{And, Or, Consecutive};
|
use crate::search::criteria::Operation::{And, Or, Consecutive};
|
||||||
|
|
||||||
fn and_recurse(head: &Operation, tail: &[Operation]) -> Vec<Vec<Vec<Query>>> {
|
fn and_recurse(head: &Operation, tail: &[Operation]) -> FlattenedQueryTree {
|
||||||
match tail.split_first() {
|
match tail.split_first() {
|
||||||
Some((thead, tail)) => {
|
Some((thead, tail)) => {
|
||||||
let tail = and_recurse(thead, tail);
|
let tail = and_recurse(thead, tail);
|
||||||
@ -680,7 +671,7 @@ fn flatten_query_tree(query_tree: &Operation) -> Vec<Vec<Vec<Query>>> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn recurse(op: &Operation) -> Vec<Vec<Vec<Query>>> {
|
fn recurse(op: &Operation) -> FlattenedQueryTree {
|
||||||
match op {
|
match op {
|
||||||
And(ops) | Consecutive(ops) => {
|
And(ops) | Consecutive(ops) => {
|
||||||
ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t))
|
ops.split_first().map_or_else(Vec::new, |(h, t)| and_recurse(h, t))
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
use std::{collections::HashMap, mem};
|
use std::mem::take;
|
||||||
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -60,13 +60,13 @@ impl<'t> Criterion for Exactness<'t> {
|
|||||||
self.query_tree = None;
|
self.query_tree = None;
|
||||||
},
|
},
|
||||||
Some(state) => {
|
Some(state) => {
|
||||||
let (candidates, state) = resolve_state(self.ctx, mem::take(state), &self.query)?;
|
let (candidates, state) = resolve_state(self.ctx, take(state), &self.query)?;
|
||||||
self.state = state;
|
self.state = state;
|
||||||
|
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: self.query_tree.clone(),
|
query_tree: self.query_tree.clone(),
|
||||||
candidates: Some(candidates),
|
candidates: Some(candidates),
|
||||||
bucket_candidates: mem::take(&mut self.bucket_candidates),
|
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
@ -74,11 +74,16 @@ impl<'t> Criterion for Exactness<'t> {
|
|||||||
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
||||||
let candidates = match candidates {
|
let candidates = match candidates {
|
||||||
Some(candidates) => candidates,
|
Some(candidates) => candidates,
|
||||||
None => resolve_query_tree(self.ctx, &query_tree, &mut HashMap::new(), params.wdcache)?,
|
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)?,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
match bucket_candidates {
|
||||||
|
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||||
|
None => self.bucket_candidates |= &candidates,
|
||||||
|
}
|
||||||
|
|
||||||
self.state = Some(State::new(candidates));
|
self.state = Some(State::new(candidates));
|
||||||
self.query_tree = Some(query_tree);
|
self.query_tree = Some(query_tree);
|
||||||
self.bucket_candidates |= bucket_candidates;
|
|
||||||
},
|
},
|
||||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
|
@ -1,5 +1,3 @@
|
|||||||
use std::collections::HashMap;
|
|
||||||
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
@ -41,19 +39,15 @@ impl<'t> Final<'t> {
|
|||||||
};
|
};
|
||||||
|
|
||||||
match self.parent.next(&mut criterion_parameters)? {
|
match self.parent.next(&mut criterion_parameters)? {
|
||||||
Some(CriterionResult { query_tree, candidates, mut bucket_candidates }) => {
|
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||||
let candidates = match candidates {
|
let candidates = match (candidates, query_tree.as_ref()) {
|
||||||
Some(candidates) => candidates,
|
(Some(candidates), _) => candidates,
|
||||||
None => {
|
(None, Some(qt)) => resolve_query_tree(self.ctx, qt, &mut self.wdcache)?,
|
||||||
let candidates = match query_tree.as_ref() {
|
(None, None) => self.ctx.documents_ids()?,
|
||||||
Some(qt) => resolve_query_tree(self.ctx, qt, &mut HashMap::new(), &mut self.wdcache)?,
|
|
||||||
None => self.ctx.documents_ids()?,
|
|
||||||
};
|
|
||||||
bucket_candidates |= &candidates;
|
|
||||||
candidates
|
|
||||||
}
|
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone());
|
||||||
|
|
||||||
self.returned_candidates |= &candidates;
|
self.returned_candidates |= &candidates;
|
||||||
|
|
||||||
return Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }));
|
return Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }));
|
||||||
|
@ -12,8 +12,8 @@ impl Initial {
|
|||||||
pub fn new(query_tree: Option<Operation>, mut candidates: Option<RoaringBitmap>) -> Initial {
|
pub fn new(query_tree: Option<Operation>, mut candidates: Option<RoaringBitmap>) -> Initial {
|
||||||
let answer = CriterionResult {
|
let answer = CriterionResult {
|
||||||
query_tree,
|
query_tree,
|
||||||
candidates: candidates.clone(),
|
candidates: candidates.take(),
|
||||||
bucket_candidates: candidates.take().unwrap_or_default(),
|
bucket_candidates: None,
|
||||||
};
|
};
|
||||||
Initial { answer: Some(answer) }
|
Initial { answer: Some(answer) }
|
||||||
}
|
}
|
||||||
|
@ -39,7 +39,7 @@ pub struct CriterionResult {
|
|||||||
/// if None, it is up to the child to compute the candidates itself.
|
/// if None, it is up to the child to compute the candidates itself.
|
||||||
candidates: Option<RoaringBitmap>,
|
candidates: Option<RoaringBitmap>,
|
||||||
/// Candidates that comes from the current bucket of the initial criterion.
|
/// Candidates that comes from the current bucket of the initial criterion.
|
||||||
bucket_candidates: RoaringBitmap,
|
bucket_candidates: Option<RoaringBitmap>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
@ -57,15 +57,6 @@ enum Candidates {
|
|||||||
Forbidden(RoaringBitmap)
|
Forbidden(RoaringBitmap)
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Candidates {
|
|
||||||
fn into_inner(self) -> RoaringBitmap {
|
|
||||||
match self {
|
|
||||||
Self::Allowed(inner) => inner,
|
|
||||||
Self::Forbidden(inner) => inner,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl Default for Candidates {
|
impl Default for Candidates {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self::Forbidden(RoaringBitmap::new())
|
Self::Forbidden(RoaringBitmap::new())
|
||||||
@ -236,14 +227,12 @@ impl<'t> CriteriaBuilder<'t> {
|
|||||||
pub fn resolve_query_tree<'t>(
|
pub fn resolve_query_tree<'t>(
|
||||||
ctx: &'t dyn Context,
|
ctx: &'t dyn Context,
|
||||||
query_tree: &Operation,
|
query_tree: &Operation,
|
||||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> anyhow::Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
fn resolve_operation<'t>(
|
fn resolve_operation<'t>(
|
||||||
ctx: &'t dyn Context,
|
ctx: &'t dyn Context,
|
||||||
query_tree: &Operation,
|
query_tree: &Operation,
|
||||||
cache: &mut HashMap<(Operation, u8), RoaringBitmap>,
|
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<RoaringBitmap>
|
) -> anyhow::Result<RoaringBitmap>
|
||||||
{
|
{
|
||||||
@ -252,7 +241,7 @@ pub fn resolve_query_tree<'t>(
|
|||||||
match query_tree {
|
match query_tree {
|
||||||
And(ops) => {
|
And(ops) => {
|
||||||
let mut ops = ops.iter().map(|op| {
|
let mut ops = ops.iter().map(|op| {
|
||||||
resolve_operation(ctx, op, cache, wdcache)
|
resolve_operation(ctx, op, wdcache)
|
||||||
}).collect::<anyhow::Result<Vec<_>>>()?;
|
}).collect::<anyhow::Result<Vec<_>>>()?;
|
||||||
|
|
||||||
ops.sort_unstable_by_key(|cds| cds.len());
|
ops.sort_unstable_by_key(|cds| cds.len());
|
||||||
@ -296,7 +285,7 @@ pub fn resolve_query_tree<'t>(
|
|||||||
Or(_, ops) => {
|
Or(_, ops) => {
|
||||||
let mut candidates = RoaringBitmap::new();
|
let mut candidates = RoaringBitmap::new();
|
||||||
for op in ops {
|
for op in ops {
|
||||||
let docids = resolve_operation(ctx, op, cache, wdcache)?;
|
let docids = resolve_operation(ctx, op, wdcache)?;
|
||||||
candidates.union_with(&docids);
|
candidates.union_with(&docids);
|
||||||
}
|
}
|
||||||
Ok(candidates)
|
Ok(candidates)
|
||||||
@ -305,7 +294,7 @@ pub fn resolve_query_tree<'t>(
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
resolve_operation(ctx, query_tree, cache, wdcache)
|
resolve_operation(ctx, query_tree, wdcache)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@ -30,8 +30,8 @@ const PROXIMITY_THRESHOLD: u8 = 0;
|
|||||||
|
|
||||||
pub struct Proximity<'t> {
|
pub struct Proximity<'t> {
|
||||||
ctx: &'t dyn Context<'t>,
|
ctx: &'t dyn Context<'t>,
|
||||||
/// ((max_proximity, query_tree), allowed_candidates)
|
/// (max_proximity, query_tree, allowed_candidates)
|
||||||
state: Option<(Option<(usize, Operation)>, RoaringBitmap)>,
|
state: Option<(u8, Operation, RoaringBitmap)>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
bucket_candidates: RoaringBitmap,
|
bucket_candidates: RoaringBitmap,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
@ -57,114 +57,90 @@ impl<'t> Criterion for Proximity<'t> {
|
|||||||
#[logging_timer::time("Proximity::{}")]
|
#[logging_timer::time("Proximity::{}")]
|
||||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
||||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||||
if let Some((_, candidates)) = self.state.as_mut() {
|
if let Some((_, _, allowed_candidates)) = self.state.as_mut() {
|
||||||
*candidates -= params.excluded_candidates;
|
*allowed_candidates -= params.excluded_candidates;
|
||||||
}
|
}
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
debug!("Proximity at iteration {} (max prox {:?}) ({:?})",
|
debug!("Proximity at iteration {} (max prox {:?}) ({:?})",
|
||||||
self.proximity,
|
self.proximity,
|
||||||
self.state.as_ref().map(|(qt, _)| qt.as_ref().map(|(mp, _)| mp)),
|
self.state.as_ref().map(|(mp, _, _)| mp),
|
||||||
self.state.as_ref().map(|(_, cd)| cd),
|
self.state.as_ref().map(|(_, _, cd)| cd),
|
||||||
);
|
);
|
||||||
|
|
||||||
match &mut self.state {
|
match &mut self.state {
|
||||||
Some((_, candidates)) if candidates.is_empty() => {
|
Some((max_prox, _, allowed_candidates)) if allowed_candidates.is_empty() || self.proximity > *max_prox => {
|
||||||
self.state = None; // reset state
|
self.state = None; // reset state
|
||||||
},
|
},
|
||||||
Some((Some((max_prox, query_tree)), candidates)) => {
|
Some((_, query_tree, allowed_candidates)) => {
|
||||||
if self.proximity as usize > *max_prox {
|
let mut new_candidates = if allowed_candidates.len() <= CANDIDATES_THRESHOLD && self.proximity > PROXIMITY_THRESHOLD {
|
||||||
self.state = None; // reset state
|
if let Some(cache) = self.plane_sweep_cache.as_mut() {
|
||||||
} else {
|
match cache.next() {
|
||||||
let mut new_candidates = if candidates.len() <= CANDIDATES_THRESHOLD && self.proximity > PROXIMITY_THRESHOLD {
|
Some((p, candidates)) => {
|
||||||
if let Some(cache) = self.plane_sweep_cache.as_mut() {
|
self.proximity = p;
|
||||||
match cache.next() {
|
candidates
|
||||||
Some((p, candidates)) => {
|
},
|
||||||
self.proximity = p;
|
None => {
|
||||||
candidates
|
self.state = None; // reset state
|
||||||
},
|
continue
|
||||||
None => {
|
},
|
||||||
self.state = None; // reset state
|
|
||||||
continue
|
|
||||||
},
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
let cache = resolve_plane_sweep_candidates(
|
|
||||||
self.ctx,
|
|
||||||
query_tree,
|
|
||||||
candidates,
|
|
||||||
params.wdcache,
|
|
||||||
)?;
|
|
||||||
self.plane_sweep_cache = Some(cache.into_iter());
|
|
||||||
|
|
||||||
continue
|
|
||||||
}
|
}
|
||||||
} else { // use set theory based algorithm
|
} else {
|
||||||
resolve_candidates(
|
let cache = resolve_plane_sweep_candidates(
|
||||||
self.ctx,
|
self.ctx,
|
||||||
&query_tree,
|
query_tree,
|
||||||
self.proximity,
|
allowed_candidates,
|
||||||
&mut self.candidates_cache,
|
params.wdcache,
|
||||||
params.wdcache,
|
)?;
|
||||||
)?
|
self.plane_sweep_cache = Some(cache.into_iter());
|
||||||
};
|
|
||||||
|
|
||||||
new_candidates.intersect_with(&candidates);
|
continue
|
||||||
candidates.difference_with(&new_candidates);
|
}
|
||||||
self.proximity += 1;
|
} else { // use set theory based algorithm
|
||||||
|
resolve_candidates(
|
||||||
|
self.ctx,
|
||||||
|
&query_tree,
|
||||||
|
self.proximity,
|
||||||
|
&mut self.candidates_cache,
|
||||||
|
params.wdcache,
|
||||||
|
)?
|
||||||
|
};
|
||||||
|
|
||||||
|
new_candidates &= &*allowed_candidates;
|
||||||
|
*allowed_candidates -= &new_candidates;
|
||||||
|
self.proximity += 1;
|
||||||
|
|
||||||
return Ok(Some(CriterionResult {
|
|
||||||
query_tree: Some(query_tree.clone()),
|
|
||||||
candidates: Some(new_candidates),
|
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
},
|
|
||||||
Some((None, candidates)) => {
|
|
||||||
let candidates = take(candidates);
|
|
||||||
self.state = None; // reset state
|
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: None,
|
query_tree: Some(query_tree.clone()),
|
||||||
candidates: Some(candidates.clone()),
|
candidates: Some(new_candidates),
|
||||||
bucket_candidates: candidates,
|
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
match self.parent.next(params)? {
|
match self.parent.next(params)? {
|
||||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
||||||
return Ok(Some(CriterionResult {
|
let candidates = match candidates {
|
||||||
query_tree: None,
|
Some(candidates) => candidates,
|
||||||
candidates: None,
|
None => resolve_query_tree(self.ctx, &query_tree, params.wdcache)? - params.excluded_candidates,
|
||||||
bucket_candidates,
|
|
||||||
}));
|
|
||||||
},
|
|
||||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
|
||||||
let candidates_is_some = candidates.is_some();
|
|
||||||
let candidates = match (&query_tree, candidates) {
|
|
||||||
(_, Some(candidates)) => candidates,
|
|
||||||
(Some(qt), None) => {
|
|
||||||
let candidates = resolve_query_tree(self.ctx, qt, &mut HashMap::new(), params.wdcache)?;
|
|
||||||
candidates - params.excluded_candidates
|
|
||||||
},
|
|
||||||
(None, None) => RoaringBitmap::new(),
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// If our parent returns candidates it means that the bucket
|
match bucket_candidates {
|
||||||
// candidates were already computed before and we can use them.
|
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||||
//
|
None => self.bucket_candidates |= &candidates,
|
||||||
// If not, we must use the just computed candidates as our bucket
|
|
||||||
// candidates.
|
|
||||||
if candidates_is_some {
|
|
||||||
self.bucket_candidates.union_with(&bucket_candidates);
|
|
||||||
} else {
|
|
||||||
self.bucket_candidates.union_with(&candidates);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
let query_tree = query_tree.map(|op| (maximum_proximity(&op), op));
|
let maximum_proximity = maximum_proximity(&query_tree);
|
||||||
self.state = Some((query_tree, candidates));
|
self.state = Some((maximum_proximity as u8, query_tree, candidates));
|
||||||
self.proximity = 0;
|
self.proximity = 0;
|
||||||
self.plane_sweep_cache = None;
|
self.plane_sweep_cache = None;
|
||||||
},
|
},
|
||||||
|
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
|
||||||
|
return Ok(Some(CriterionResult {
|
||||||
|
query_tree: None,
|
||||||
|
candidates,
|
||||||
|
bucket_candidates,
|
||||||
|
}));
|
||||||
|
},
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
@ -13,15 +13,19 @@ use super::{
|
|||||||
CriterionParameters,
|
CriterionParameters,
|
||||||
CriterionResult,
|
CriterionResult,
|
||||||
query_docids,
|
query_docids,
|
||||||
query_pair_proximity_docids
|
query_pair_proximity_docids,
|
||||||
|
resolve_query_tree,
|
||||||
};
|
};
|
||||||
|
|
||||||
|
/// Maximum number of typo for a word of any length.
|
||||||
|
const MAX_TYPOS_PER_WORD: u8 = 2;
|
||||||
|
|
||||||
pub struct Typo<'t> {
|
pub struct Typo<'t> {
|
||||||
ctx: &'t dyn Context<'t>,
|
ctx: &'t dyn Context<'t>,
|
||||||
query_tree: Option<(usize, Operation)>,
|
/// (max_typos, query_tree, candidates)
|
||||||
number_typos: u8,
|
state: Option<(u8, Operation, Candidates)>,
|
||||||
candidates: Candidates,
|
typos: u8,
|
||||||
bucket_candidates: RoaringBitmap,
|
bucket_candidates: Option<RoaringBitmap>,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
|
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
|
||||||
}
|
}
|
||||||
@ -30,10 +34,9 @@ impl<'t> Typo<'t> {
|
|||||||
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
|
pub fn new(ctx: &'t dyn Context<'t>, parent: Box<dyn Criterion + 't>) -> Self {
|
||||||
Typo {
|
Typo {
|
||||||
ctx,
|
ctx,
|
||||||
query_tree: None,
|
state: None,
|
||||||
number_typos: 0,
|
typos: 0,
|
||||||
candidates: Candidates::default(),
|
bucket_candidates: None,
|
||||||
bucket_candidates: RoaringBitmap::new(),
|
|
||||||
parent,
|
parent,
|
||||||
candidates_cache: HashMap::new(),
|
candidates_cache: HashMap::new(),
|
||||||
}
|
}
|
||||||
@ -45,113 +48,101 @@ impl<'t> Criterion for Typo<'t> {
|
|||||||
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
fn next(&mut self, params: &mut CriterionParameters) -> anyhow::Result<Option<CriterionResult>> {
|
||||||
use Candidates::{Allowed, Forbidden};
|
use Candidates::{Allowed, Forbidden};
|
||||||
// remove excluded candidates when next is called, instead of doing it in the loop.
|
// remove excluded candidates when next is called, instead of doing it in the loop.
|
||||||
match &mut self.candidates {
|
match self.state.as_mut() {
|
||||||
Allowed(candidates) => *candidates -= params.excluded_candidates,
|
Some((_, _, Allowed(candidates))) => *candidates -= params.excluded_candidates,
|
||||||
Forbidden(candidates) => *candidates |= params.excluded_candidates,
|
Some((_, _, Forbidden(candidates))) => *candidates |= params.excluded_candidates,
|
||||||
|
None => (),
|
||||||
}
|
}
|
||||||
|
|
||||||
loop {
|
loop {
|
||||||
debug!("Typo at iteration {} ({:?})", self.number_typos, self.candidates);
|
debug!("Typo at iteration {} (max typos {:?}) ({:?})",
|
||||||
|
self.typos,
|
||||||
|
self.state.as_ref().map(|(mt, _, _)| mt),
|
||||||
|
self.state.as_ref().map(|(_, _, cd)| cd),
|
||||||
|
);
|
||||||
|
|
||||||
match (&mut self.query_tree, &mut self.candidates) {
|
match self.state.as_mut() {
|
||||||
(_, Allowed(candidates)) if candidates.is_empty() => {
|
Some((max_typos, _, _)) if self.typos > *max_typos => {
|
||||||
return Ok(Some(CriterionResult {
|
self.state = None; // reset state
|
||||||
query_tree: self.query_tree.take().map(|(_, qt)| qt),
|
|
||||||
candidates: Some(take(&mut self.candidates).into_inner()),
|
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
|
||||||
}));
|
|
||||||
},
|
},
|
||||||
(Some((max_typos, query_tree)), Allowed(candidates)) => {
|
Some((_, _, Allowed(allowed_candidates))) if allowed_candidates.is_empty() => {
|
||||||
if self.number_typos as usize > *max_typos {
|
self.state = None; // reset state
|
||||||
self.query_tree = None;
|
},
|
||||||
self.candidates = Candidates::default();
|
Some((_, query_tree, candidates_authorization)) => {
|
||||||
} else {
|
let fst = self.ctx.words_fst();
|
||||||
let fst = self.ctx.words_fst();
|
let new_query_tree = if self.typos < MAX_TYPOS_PER_WORD {
|
||||||
let new_query_tree = if self.number_typos < 2 {
|
alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?
|
||||||
alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?
|
} else if self.typos == MAX_TYPOS_PER_WORD {
|
||||||
} else if self.number_typos == 2 {
|
// When typos >= MAX_TYPOS_PER_WORD, no more alteration of the query tree is possible,
|
||||||
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?;
|
// we keep the altered query tree
|
||||||
query_tree.clone()
|
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.typos, params.wdcache)?;
|
||||||
} else {
|
// we compute the allowed candidates
|
||||||
query_tree.clone()
|
let query_tree_allowed_candidates = resolve_query_tree(self.ctx, query_tree, params.wdcache)?;
|
||||||
|
// we assign the allowed candidates to the candidates authorization.
|
||||||
|
*candidates_authorization = match take(candidates_authorization) {
|
||||||
|
Allowed(allowed_candidates) => Allowed(query_tree_allowed_candidates & allowed_candidates),
|
||||||
|
Forbidden(forbidden_candidates) => Allowed(query_tree_allowed_candidates - forbidden_candidates),
|
||||||
};
|
};
|
||||||
|
query_tree.clone()
|
||||||
let mut new_candidates = resolve_candidates(
|
|
||||||
self.ctx,
|
|
||||||
&new_query_tree,
|
|
||||||
self.number_typos,
|
|
||||||
&mut self.candidates_cache,
|
|
||||||
params.wdcache,
|
|
||||||
)?;
|
|
||||||
new_candidates.intersect_with(&candidates);
|
|
||||||
candidates.difference_with(&new_candidates);
|
|
||||||
self.number_typos += 1;
|
|
||||||
|
|
||||||
return Ok(Some(CriterionResult {
|
|
||||||
query_tree: Some(new_query_tree),
|
|
||||||
candidates: Some(new_candidates),
|
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
|
||||||
}));
|
|
||||||
}
|
|
||||||
},
|
|
||||||
(Some((max_typos, query_tree)), Forbidden(candidates)) => {
|
|
||||||
if self.number_typos as usize > *max_typos {
|
|
||||||
self.query_tree = None;
|
|
||||||
self.candidates = Candidates::default();
|
|
||||||
} else {
|
} else {
|
||||||
let fst = self.ctx.words_fst();
|
query_tree.clone()
|
||||||
let new_query_tree = if self.number_typos < 2 {
|
};
|
||||||
alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?
|
|
||||||
} else if self.number_typos == 2 {
|
|
||||||
*query_tree = alterate_query_tree(&fst, query_tree.clone(), self.number_typos, params.wdcache)?;
|
|
||||||
query_tree.clone()
|
|
||||||
} else {
|
|
||||||
query_tree.clone()
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut new_candidates = resolve_candidates(
|
let mut candidates = resolve_candidates(
|
||||||
self.ctx,
|
self.ctx,
|
||||||
&new_query_tree,
|
&new_query_tree,
|
||||||
self.number_typos,
|
self.typos,
|
||||||
&mut self.candidates_cache,
|
&mut self.candidates_cache,
|
||||||
params.wdcache,
|
params.wdcache,
|
||||||
)?;
|
)?;
|
||||||
new_candidates.difference_with(&candidates);
|
|
||||||
candidates.union_with(&new_candidates);
|
|
||||||
self.number_typos += 1;
|
|
||||||
self.bucket_candidates.union_with(&new_candidates);
|
|
||||||
|
|
||||||
return Ok(Some(CriterionResult {
|
match candidates_authorization {
|
||||||
query_tree: Some(new_query_tree),
|
Allowed(allowed_candidates) => {
|
||||||
candidates: Some(new_candidates),
|
candidates &= &*allowed_candidates;
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
*allowed_candidates -= &candidates;
|
||||||
}));
|
|
||||||
}
|
|
||||||
},
|
|
||||||
(None, Allowed(_)) => {
|
|
||||||
let candidates = take(&mut self.candidates).into_inner();
|
|
||||||
return Ok(Some(CriterionResult {
|
|
||||||
query_tree: None,
|
|
||||||
candidates: Some(candidates.clone()),
|
|
||||||
bucket_candidates: candidates,
|
|
||||||
}));
|
|
||||||
},
|
|
||||||
(None, Forbidden(_)) => {
|
|
||||||
match self.parent.next(params)? {
|
|
||||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
|
||||||
return Ok(Some(CriterionResult {
|
|
||||||
query_tree: None,
|
|
||||||
candidates: None,
|
|
||||||
bucket_candidates,
|
|
||||||
}));
|
|
||||||
},
|
},
|
||||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
Forbidden(forbidden_candidates) => {
|
||||||
self.query_tree = query_tree.map(|op| (maximum_typo(&op), op));
|
candidates -= &*forbidden_candidates;
|
||||||
self.number_typos = 0;
|
*forbidden_candidates |= &candidates;
|
||||||
self.candidates = candidates.map_or_else(|| {
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
let bucket_candidates = match self.bucket_candidates.as_mut() {
|
||||||
|
Some(bucket_candidates) => take(bucket_candidates),
|
||||||
|
None => candidates.clone(),
|
||||||
|
};
|
||||||
|
|
||||||
|
self.typos += 1;
|
||||||
|
|
||||||
|
return Ok(Some(CriterionResult {
|
||||||
|
query_tree: Some(new_query_tree),
|
||||||
|
candidates: Some(candidates),
|
||||||
|
bucket_candidates: Some(bucket_candidates),
|
||||||
|
}));
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
match self.parent.next(params)? {
|
||||||
|
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
||||||
|
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
|
||||||
|
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
|
||||||
|
(self_bc, parent_bc) => self_bc.or(parent_bc),
|
||||||
|
};
|
||||||
|
|
||||||
|
let candidates = candidates.map_or_else(|| {
|
||||||
Candidates::Forbidden(params.excluded_candidates.clone())
|
Candidates::Forbidden(params.excluded_candidates.clone())
|
||||||
}, Candidates::Allowed);
|
}, Candidates::Allowed);
|
||||||
self.bucket_candidates.union_with(&bucket_candidates);
|
|
||||||
|
let maximum_typos = maximum_typo(&query_tree) as u8;
|
||||||
|
self.state = Some((maximum_typos, query_tree, candidates));
|
||||||
|
self.typos = 0;
|
||||||
|
|
||||||
|
},
|
||||||
|
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
|
||||||
|
return Ok(Some(CriterionResult {
|
||||||
|
query_tree: None,
|
||||||
|
candidates,
|
||||||
|
bucket_candidates,
|
||||||
|
}));
|
||||||
},
|
},
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
}
|
}
|
||||||
@ -185,7 +176,6 @@ fn alterate_query_tree(
|
|||||||
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
|
ops.iter_mut().try_for_each(|op| recurse(words_fst, op, number_typos, wdcache))
|
||||||
},
|
},
|
||||||
Operation::Query(q) => {
|
Operation::Query(q) => {
|
||||||
// TODO may be optimized when number_typos == 0
|
|
||||||
if let QueryKind::Tolerant { typo, word } = &q.kind {
|
if let QueryKind::Tolerant { typo, word } = &q.kind {
|
||||||
// if no typo is allowed we don't call word_derivations function,
|
// if no typo is allowed we don't call word_derivations function,
|
||||||
// and directly create an Exact query
|
// and directly create an Exact query
|
||||||
@ -384,7 +374,7 @@ mod test {
|
|||||||
]),
|
]),
|
||||||
])),
|
])),
|
||||||
candidates: Some(candidates_1.clone()),
|
candidates: Some(candidates_1.clone()),
|
||||||
bucket_candidates: candidates_1,
|
bucket_candidates: Some(candidates_1),
|
||||||
};
|
};
|
||||||
|
|
||||||
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
|
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
|
||||||
@ -406,7 +396,7 @@ mod test {
|
|||||||
]),
|
]),
|
||||||
])),
|
])),
|
||||||
candidates: Some(candidates_2.clone()),
|
candidates: Some(candidates_2.clone()),
|
||||||
bucket_candidates: candidates_2,
|
bucket_candidates: Some(candidates_2),
|
||||||
};
|
};
|
||||||
|
|
||||||
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));
|
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));
|
||||||
@ -428,7 +418,7 @@ mod test {
|
|||||||
let expected = CriterionResult {
|
let expected = CriterionResult {
|
||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates: Some(facet_candidates.clone()),
|
candidates: Some(facet_candidates.clone()),
|
||||||
bucket_candidates: facet_candidates,
|
bucket_candidates: None,
|
||||||
};
|
};
|
||||||
|
|
||||||
// first iteration, returns the facet candidates
|
// first iteration, returns the facet candidates
|
||||||
@ -471,7 +461,7 @@ mod test {
|
|||||||
]),
|
]),
|
||||||
])),
|
])),
|
||||||
candidates: Some(&candidates_1 & &facet_candidates),
|
candidates: Some(&candidates_1 & &facet_candidates),
|
||||||
bucket_candidates: facet_candidates.clone(),
|
bucket_candidates: Some(&candidates_1 & &facet_candidates),
|
||||||
};
|
};
|
||||||
|
|
||||||
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
|
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_1));
|
||||||
@ -493,7 +483,7 @@ mod test {
|
|||||||
]),
|
]),
|
||||||
])),
|
])),
|
||||||
candidates: Some(&candidates_2 & &facet_candidates),
|
candidates: Some(&candidates_2 & &facet_candidates),
|
||||||
bucket_candidates: RoaringBitmap::new(),
|
bucket_candidates: Some(&candidates_2 & &facet_candidates),
|
||||||
};
|
};
|
||||||
|
|
||||||
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));
|
assert_eq!(criteria.next(&mut criterion_parameters).unwrap(), Some(expected_2));
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
use std::collections::HashMap;
|
|
||||||
use std::mem::take;
|
use std::mem::take;
|
||||||
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
@ -11,9 +10,9 @@ pub struct Words<'t> {
|
|||||||
ctx: &'t dyn Context<'t>,
|
ctx: &'t dyn Context<'t>,
|
||||||
query_trees: Vec<Operation>,
|
query_trees: Vec<Operation>,
|
||||||
candidates: Option<RoaringBitmap>,
|
candidates: Option<RoaringBitmap>,
|
||||||
bucket_candidates: RoaringBitmap,
|
bucket_candidates: Option<RoaringBitmap>,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
|
compute_candidates: bool,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t> Words<'t> {
|
impl<'t> Words<'t> {
|
||||||
@ -22,9 +21,9 @@ impl<'t> Words<'t> {
|
|||||||
ctx,
|
ctx,
|
||||||
query_trees: Vec::default(),
|
query_trees: Vec::default(),
|
||||||
candidates: None,
|
candidates: None,
|
||||||
bucket_candidates: RoaringBitmap::new(),
|
bucket_candidates: None,
|
||||||
parent,
|
parent,
|
||||||
candidates_cache: HashMap::default(),
|
compute_candidates: false,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -40,55 +39,48 @@ impl<'t> Criterion for Words<'t> {
|
|||||||
loop {
|
loop {
|
||||||
debug!("Words at iteration {} ({:?})", self.query_trees.len(), self.candidates);
|
debug!("Words at iteration {} ({:?})", self.query_trees.len(), self.candidates);
|
||||||
|
|
||||||
match (self.query_trees.pop(), &mut self.candidates) {
|
match self.query_trees.pop() {
|
||||||
(query_tree, Some(candidates)) if candidates.is_empty() => {
|
Some(query_tree) => {
|
||||||
self.query_trees = Vec::new();
|
let candidates = match self.candidates.as_mut() {
|
||||||
return Ok(Some(CriterionResult {
|
Some(allowed_candidates) if self.compute_candidates => {
|
||||||
query_tree,
|
let mut candidates = resolve_query_tree(self.ctx, &query_tree, params.wdcache)?;
|
||||||
candidates: self.candidates.take(),
|
candidates &= &*allowed_candidates;
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
*allowed_candidates -= &candidates;
|
||||||
}));
|
Some(candidates)
|
||||||
},
|
},
|
||||||
(Some(qt), Some(candidates)) => {
|
candidates => candidates.cloned(),
|
||||||
let mut found_candidates = resolve_query_tree(self.ctx, &qt, &mut self.candidates_cache, params.wdcache)?;
|
};
|
||||||
found_candidates.intersect_with(&candidates);
|
|
||||||
candidates.difference_with(&found_candidates);
|
let bucket_candidates = match self.bucket_candidates.as_mut() {
|
||||||
|
Some(bucket_candidates) => Some(take(bucket_candidates)),
|
||||||
|
None => None,
|
||||||
|
};
|
||||||
|
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: Some(qt),
|
query_tree: Some(query_tree),
|
||||||
candidates: Some(found_candidates),
|
candidates,
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
bucket_candidates,
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
(Some(qt), None) => {
|
None => {
|
||||||
return Ok(Some(CriterionResult {
|
|
||||||
query_tree: Some(qt),
|
|
||||||
candidates: None,
|
|
||||||
bucket_candidates: take(&mut self.bucket_candidates),
|
|
||||||
}));
|
|
||||||
},
|
|
||||||
(None, Some(_)) => {
|
|
||||||
let candidates = self.candidates.take();
|
|
||||||
return Ok(Some(CriterionResult {
|
|
||||||
query_tree: None,
|
|
||||||
candidates: candidates.clone(),
|
|
||||||
bucket_candidates: candidates.unwrap_or_default(),
|
|
||||||
}));
|
|
||||||
},
|
|
||||||
(None, None) => {
|
|
||||||
match self.parent.next(params)? {
|
match self.parent.next(params)? {
|
||||||
Some(CriterionResult { query_tree: None, candidates: None, bucket_candidates }) => {
|
Some(CriterionResult { query_tree: Some(query_tree), candidates, bucket_candidates }) => {
|
||||||
|
self.query_trees = explode_query_tree(query_tree);
|
||||||
|
self.candidates = candidates;
|
||||||
|
self.compute_candidates = bucket_candidates.is_some();
|
||||||
|
|
||||||
|
self.bucket_candidates = match (self.bucket_candidates.take(), bucket_candidates) {
|
||||||
|
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
|
||||||
|
(self_bc, parent_bc) => self_bc.or(parent_bc),
|
||||||
|
};
|
||||||
|
},
|
||||||
|
Some(CriterionResult { query_tree: None, candidates, bucket_candidates }) => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates: None,
|
candidates,
|
||||||
bucket_candidates,
|
bucket_candidates,
|
||||||
}));
|
}));
|
||||||
},
|
},
|
||||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
|
||||||
self.query_trees = query_tree.map(explode_query_tree).unwrap_or_default();
|
|
||||||
self.candidates = candidates;
|
|
||||||
self.bucket_candidates.union_with(&bucket_candidates);
|
|
||||||
},
|
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
|
Loading…
x
Reference in New Issue
Block a user