mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-25 14:10:06 +01:00
Optimize Atrribute criterion on big requests
This commit is contained in:
parent
716c8e22b0
commit
e77291a6f3
@ -101,7 +101,7 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
},
|
},
|
||||||
(Some(qt), None) => {
|
(Some(qt), None) => {
|
||||||
let query_tree_candidates = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), wdcache)?;
|
let query_tree_candidates = resolve_query_tree(self.ctx, &qt, &mut HashMap::new(), wdcache)?;
|
||||||
self.bucket_candidates.union_with(&query_tree_candidates);
|
self.bucket_candidates |= &query_tree_candidates;
|
||||||
self.candidates = Some(query_tree_candidates);
|
self.candidates = Some(query_tree_candidates);
|
||||||
},
|
},
|
||||||
(None, Some(_)) => {
|
(None, Some(_)) => {
|
||||||
@ -123,7 +123,7 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
Some(CriterionResult { query_tree, candidates, bucket_candidates }) => {
|
||||||
self.query_tree = query_tree;
|
self.query_tree = query_tree;
|
||||||
self.candidates = candidates;
|
self.candidates = candidates;
|
||||||
self.bucket_candidates.union_with(&bucket_candidates);
|
self.bucket_candidates |= bucket_candidates;
|
||||||
self.flattened_query_tree = None;
|
self.flattened_query_tree = None;
|
||||||
self.current_buckets = None;
|
self.current_buckets = None;
|
||||||
},
|
},
|
||||||
@ -160,14 +160,12 @@ impl<'t, 'q> WordLevelIterator<'t, 'q> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn dig(&self, ctx: &'t dyn Context<'t>, level: &TreeLevel) -> heed::Result<Self> {
|
fn dig(&self, ctx: &'t dyn Context<'t>, level: &TreeLevel, left_interval: Option<u32>) -> heed::Result<Self> {
|
||||||
let level = level.min(&self.level).clone();
|
let level = level.min(&self.level).clone();
|
||||||
let interval_size = 4u32.pow(Into::<u8>::into(level.clone()) as u32);
|
let interval_size = 4u32.pow(Into::<u8>::into(level.clone()) as u32);
|
||||||
let word = self.word.clone();
|
let word = self.word.clone();
|
||||||
let in_prefix_cache = self.in_prefix_cache;
|
let in_prefix_cache = self.in_prefix_cache;
|
||||||
// TODO try to dig starting from the current interval
|
let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, left_interval, None)?;
|
||||||
// let left = self.current_interval.map(|(left, _)| left);
|
|
||||||
let inner = ctx.word_position_iterator(&word, level, in_prefix_cache, None, None)?;
|
|
||||||
|
|
||||||
Ok(Self {inner, level, interval_size, word, in_prefix_cache, inner_next: None, current_interval: None})
|
Ok(Self {inner, level, interval_size, word, in_prefix_cache, inner_next: None, current_interval: None})
|
||||||
}
|
}
|
||||||
@ -209,6 +207,7 @@ struct QueryLevelIterator<'t, 'q> {
|
|||||||
level: TreeLevel,
|
level: TreeLevel,
|
||||||
accumulator: Vec<Option<(u32, u32, RoaringBitmap)>>,
|
accumulator: Vec<Option<(u32, u32, RoaringBitmap)>>,
|
||||||
parent_accumulator: Vec<Option<(u32, u32, RoaringBitmap)>>,
|
parent_accumulator: Vec<Option<(u32, u32, RoaringBitmap)>>,
|
||||||
|
interval_to_skip: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
||||||
@ -250,6 +249,7 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
|||||||
level,
|
level,
|
||||||
accumulator: vec![],
|
accumulator: vec![],
|
||||||
parent_accumulator: vec![],
|
parent_accumulator: vec![],
|
||||||
|
interval_to_skip: 0,
|
||||||
})),
|
})),
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
@ -270,16 +270,15 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
|||||||
None => (self.level.saturating_sub(1), None),
|
None => (self.level.saturating_sub(1), None),
|
||||||
};
|
};
|
||||||
|
|
||||||
|
let left_interval = self.accumulator.get(self.interval_to_skip).map(|opt| opt.as_ref().map(|(left, _, _)| *left)).flatten();
|
||||||
let mut inner = Vec::with_capacity(self.inner.len());
|
let mut inner = Vec::with_capacity(self.inner.len());
|
||||||
for word_level_iterator in self.inner.iter() {
|
for word_level_iterator in self.inner.iter() {
|
||||||
inner.push(word_level_iterator.dig(ctx, &level)?);
|
inner.push(word_level_iterator.dig(ctx, &level, left_interval)?);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(Self {parent, inner, level, accumulator: vec![], parent_accumulator: vec![]})
|
Ok(Self {parent, inner, level, accumulator: vec![], parent_accumulator: vec![], interval_to_skip: 0})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
fn inner_next(&mut self, level: TreeLevel) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
|
fn inner_next(&mut self, level: TreeLevel) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
|
||||||
let mut accumulated: Option<(u32, u32, RoaringBitmap)> = None;
|
let mut accumulated: Option<(u32, u32, RoaringBitmap)> = None;
|
||||||
let u8_level = Into::<u8>::into(level);
|
let u8_level = Into::<u8>::into(level);
|
||||||
@ -289,12 +288,13 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
|||||||
let accumulated_count = 4u32.pow((u8_level - wli_u8_level) as u32);
|
let accumulated_count = 4u32.pow((u8_level - wli_u8_level) as u32);
|
||||||
for _ in 0..accumulated_count {
|
for _ in 0..accumulated_count {
|
||||||
if let Some((next_left, _, next_docids)) = wli.next()? {
|
if let Some((next_left, _, next_docids)) = wli.next()? {
|
||||||
accumulated = accumulated.take().map(
|
accumulated = match accumulated.take(){
|
||||||
|(acc_left, acc_right, mut acc_docids)| {
|
Some((acc_left, acc_right, mut acc_docids)) => {
|
||||||
acc_docids.union_with(&next_docids);
|
acc_docids |= next_docids;
|
||||||
(acc_left, acc_right, acc_docids)
|
Some((acc_left, acc_right, acc_docids))
|
||||||
}
|
},
|
||||||
).or_else(|| Some((next_left, next_left + interval_size, next_docids)));
|
None => Some((next_left, next_left + interval_size, next_docids)),
|
||||||
|
};
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -304,35 +304,59 @@ impl<'t, 'q> QueryLevelIterator<'t, 'q> {
|
|||||||
|
|
||||||
/// return the next meta-interval created from inner WordLevelIterators,
|
/// return the next meta-interval created from inner WordLevelIterators,
|
||||||
/// and from eventual chainned QueryLevelIterator.
|
/// and from eventual chainned QueryLevelIterator.
|
||||||
fn next(&mut self) -> heed::Result<(TreeLevel, Option<(u32, u32, RoaringBitmap)>)> {
|
fn next(&mut self, allowed_candidates: &RoaringBitmap, tree_level: TreeLevel) -> heed::Result<Option<(u32, u32, RoaringBitmap)>> {
|
||||||
let parent_result = match self.parent.as_mut() {
|
let parent_result = match self.parent.as_mut() {
|
||||||
Some(parent) => {
|
Some(parent) => {
|
||||||
Some(parent.next()?)
|
Some(parent.next(allowed_candidates, tree_level)?)
|
||||||
},
|
},
|
||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
match parent_result {
|
match parent_result {
|
||||||
Some((parent_level, parent_next)) => {
|
Some(parent_next) => {
|
||||||
let inner_next = self.inner_next(parent_level)?;
|
let inner_next = self.inner_next(tree_level)?;
|
||||||
|
self.interval_to_skip += self.accumulator.iter().zip(self.parent_accumulator.iter()).skip(self.interval_to_skip).take_while(|current| {
|
||||||
|
match current {
|
||||||
|
(Some((_, _, inner)), Some((_, _, parent))) => {
|
||||||
|
inner.is_disjoint(allowed_candidates) && parent.is_empty()
|
||||||
|
},
|
||||||
|
(Some((_, _, inner)), None) => {
|
||||||
|
inner.is_disjoint(allowed_candidates)
|
||||||
|
},
|
||||||
|
(None, Some((_, _, parent))) => {
|
||||||
|
parent.is_empty()
|
||||||
|
},
|
||||||
|
(None, None) => true,
|
||||||
|
}
|
||||||
|
}).count();
|
||||||
self.accumulator.push(inner_next);
|
self.accumulator.push(inner_next);
|
||||||
self.parent_accumulator.push(parent_next);
|
self.parent_accumulator.push(parent_next);
|
||||||
// TODO @many clean firsts intervals of both accumulators when both RoaringBitmap are empty,
|
let mut merged_interval: Option<(u32, u32, RoaringBitmap)> = None;
|
||||||
// WARNING the cleaned intervals count needs to be kept to skip at the end
|
|
||||||
let mut merged_interval = None;
|
for current in self.accumulator.iter().rev().zip(self.parent_accumulator.iter()).skip(self.interval_to_skip) {
|
||||||
for current in self.accumulator.iter().rev().zip(self.parent_accumulator.iter()) {
|
|
||||||
if let (Some((left_a, right_a, a)), Some((left_b, right_b, b))) = current {
|
if let (Some((left_a, right_a, a)), Some((left_b, right_b, b))) = current {
|
||||||
let (_, _, merged_docids) = merged_interval.get_or_insert_with(|| (left_a + left_b, right_a + right_b, RoaringBitmap::new()));
|
match merged_interval.as_mut() {
|
||||||
merged_docids.union_with(&(a & b));
|
Some((_, _, merged_docids)) => *merged_docids |= a & b,
|
||||||
|
None => merged_interval = Some((left_a + left_b, right_a + right_b, a & b)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok((parent_level, merged_interval))
|
}
|
||||||
|
Ok(merged_interval)
|
||||||
},
|
},
|
||||||
None => {
|
None => {
|
||||||
let level = self.level.clone();
|
let level = self.level;
|
||||||
let next_interval = self.inner_next(level.clone())?;
|
match self.inner_next(level)? {
|
||||||
self.accumulator = vec![next_interval.clone()];
|
Some((left, right, mut candidates)) => {
|
||||||
Ok((level, next_interval))
|
self.accumulator = vec![Some((left, right, RoaringBitmap::new()))];
|
||||||
|
candidates &= allowed_candidates;
|
||||||
|
Ok(Some((left, right, candidates)))
|
||||||
|
|
||||||
|
},
|
||||||
|
None => {
|
||||||
|
self.accumulator = vec![None];
|
||||||
|
Ok(None)
|
||||||
|
},
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -346,17 +370,31 @@ struct Branch<'t, 'q> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'t, 'q> Branch<'t, 'q> {
|
impl<'t, 'q> Branch<'t, 'q> {
|
||||||
fn next(&mut self) -> heed::Result<bool> {
|
fn next(&mut self, allowed_candidates: &RoaringBitmap) -> heed::Result<bool> {
|
||||||
match self.query_level_iterator.next()? {
|
let tree_level = self.query_level_iterator.level;
|
||||||
(tree_level, Some(last_result)) => {
|
match self.query_level_iterator.next(allowed_candidates, tree_level)? {
|
||||||
|
Some(last_result) => {
|
||||||
self.last_result = last_result;
|
self.last_result = last_result;
|
||||||
self.tree_level = tree_level;
|
self.tree_level = tree_level;
|
||||||
Ok(true)
|
Ok(true)
|
||||||
},
|
},
|
||||||
(_, None) => Ok(false),
|
None => Ok(false),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn dig(&mut self, ctx: &'t dyn Context<'t>) -> heed::Result<()> {
|
||||||
|
self.query_level_iterator = self.query_level_iterator.dig(ctx)?;
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn lazy_next(&mut self) {
|
||||||
|
let u8_level = Into::<u8>::into(self.tree_level.clone());
|
||||||
|
let interval_size = 4u32.pow(u8_level as u32);
|
||||||
|
let (left, right, _) = self.last_result;
|
||||||
|
|
||||||
|
self.last_result = (left + interval_size, right + interval_size, RoaringBitmap::new());
|
||||||
|
}
|
||||||
|
|
||||||
fn compute_rank(&self) -> u32 {
|
fn compute_rank(&self) -> u32 {
|
||||||
// we compute a rank from the left interval.
|
// we compute a rank from the left interval.
|
||||||
let (left, _, _) = self.last_result;
|
let (left, _, _) = self.last_result;
|
||||||
@ -367,11 +405,11 @@ impl<'t, 'q> Branch<'t, 'q> {
|
|||||||
let self_rank = self.compute_rank();
|
let self_rank = self.compute_rank();
|
||||||
let other_rank = other.compute_rank();
|
let other_rank = other.compute_rank();
|
||||||
let left_cmp = self_rank.cmp(&other_rank).reverse();
|
let left_cmp = self_rank.cmp(&other_rank).reverse();
|
||||||
// on level: higher is better,
|
// on level: lower is better,
|
||||||
// we want to reduce highest levels first.
|
// we want to dig faster into levels on interesting branches.
|
||||||
let level_cmp = self.tree_level.cmp(&other.tree_level);
|
let level_cmp = self.tree_level.cmp(&other.tree_level).reverse();
|
||||||
|
|
||||||
left_cmp.then(level_cmp)
|
left_cmp.then(level_cmp).then(self.last_result.2.len().cmp(&other.last_result.2.len()))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -398,6 +436,7 @@ impl<'t, 'q> Eq for Branch<'t, 'q> {}
|
|||||||
fn initialize_query_level_iterators<'t, 'q>(
|
fn initialize_query_level_iterators<'t, 'q>(
|
||||||
ctx: &'t dyn Context<'t>,
|
ctx: &'t dyn Context<'t>,
|
||||||
branches: &'q Vec<Vec<Vec<Query>>>,
|
branches: &'q Vec<Vec<Vec<Query>>>,
|
||||||
|
allowed_candidates: &RoaringBitmap,
|
||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> {
|
) -> anyhow::Result<BinaryHeap<Branch<'t, 'q>>> {
|
||||||
|
|
||||||
@ -418,7 +457,6 @@ fn initialize_query_level_iterators<'t, 'q>(
|
|||||||
branch_positions.sort_unstable_by_key(|qli| qli.level);
|
branch_positions.sort_unstable_by_key(|qli| qli.level);
|
||||||
let folded_query_level_iterators = branch_positions
|
let folded_query_level_iterators = branch_positions
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.rev()
|
|
||||||
.fold(None, |fold: Option<QueryLevelIterator>, mut qli| match fold {
|
.fold(None, |fold: Option<QueryLevelIterator>, mut qli| match fold {
|
||||||
Some(fold) => {
|
Some(fold) => {
|
||||||
qli.parent(fold);
|
qli.parent(fold);
|
||||||
@ -428,7 +466,8 @@ fn initialize_query_level_iterators<'t, 'q>(
|
|||||||
});
|
});
|
||||||
|
|
||||||
if let Some(mut folded_query_level_iterators) = folded_query_level_iterators {
|
if let Some(mut folded_query_level_iterators) = folded_query_level_iterators {
|
||||||
let (tree_level, last_result) = folded_query_level_iterators.next()?;
|
let tree_level = folded_query_level_iterators.level;
|
||||||
|
let last_result = folded_query_level_iterators.next(allowed_candidates, tree_level)?;
|
||||||
if let Some(last_result) = last_result {
|
if let Some(last_result) = last_result {
|
||||||
let branch = Branch {
|
let branch = Branch {
|
||||||
last_result,
|
last_result,
|
||||||
@ -451,48 +490,43 @@ fn set_compute_candidates<'t>(
|
|||||||
wdcache: &mut WordDerivationsCache,
|
wdcache: &mut WordDerivationsCache,
|
||||||
) -> anyhow::Result<Option<RoaringBitmap>>
|
) -> anyhow::Result<Option<RoaringBitmap>>
|
||||||
{
|
{
|
||||||
let mut branches_heap = initialize_query_level_iterators(ctx, branches, wdcache)?;
|
let mut branches_heap = initialize_query_level_iterators(ctx, branches, allowed_candidates, wdcache)?;
|
||||||
let lowest_level = TreeLevel::min_value();
|
let lowest_level = TreeLevel::min_value();
|
||||||
let mut final_candidates: Option<(u32, RoaringBitmap)> = None;
|
let mut final_candidates: Option<(u32, RoaringBitmap)> = None;
|
||||||
|
let mut allowed_candidates = allowed_candidates.clone();
|
||||||
|
|
||||||
while let Some(mut branch) = branches_heap.peek_mut() {
|
while let Some(mut branch) = branches_heap.peek_mut() {
|
||||||
let is_lowest_level = branch.tree_level == lowest_level;
|
let is_lowest_level = branch.tree_level == lowest_level;
|
||||||
let branch_rank = branch.compute_rank();
|
let branch_rank = branch.compute_rank();
|
||||||
let (_, _, candidates) = &mut branch.last_result;
|
|
||||||
candidates.intersect_with(&allowed_candidates);
|
|
||||||
if candidates.is_empty() {
|
|
||||||
// we don't have candidates, get next interval.
|
|
||||||
if !branch.next()? { PeekMut::pop(branch); }
|
|
||||||
}
|
|
||||||
else if is_lowest_level {
|
|
||||||
// we have candidates, but we can't dig deeper, return candidates.
|
|
||||||
final_candidates = match final_candidates.take() {
|
|
||||||
Some((best_rank, mut best_candidates)) => {
|
|
||||||
// if current is worst than best we break to return
|
// if current is worst than best we break to return
|
||||||
// candidates that correspond to the best rank
|
// candidates that correspond to the best rank
|
||||||
if branch_rank > best_rank {
|
if let Some((best_rank, _)) = final_candidates { if branch_rank > best_rank { break; } }
|
||||||
final_candidates = Some((best_rank, best_candidates));
|
let _left = branch.last_result.0;
|
||||||
break;
|
let candidates = take(&mut branch.last_result.2);
|
||||||
// else we add current candidates to best candidates
|
if candidates.is_empty() {
|
||||||
// and we fetch the next page
|
// we don't have candidates, get next interval.
|
||||||
} else {
|
if !branch.next(&allowed_candidates)? { PeekMut::pop(branch); }
|
||||||
best_candidates.union_with(candidates);
|
|
||||||
if !branch.next()? { PeekMut::pop(branch); }
|
|
||||||
Some((best_rank, best_candidates))
|
|
||||||
}
|
}
|
||||||
|
else if is_lowest_level {
|
||||||
|
// we have candidates, but we can't dig deeper.
|
||||||
|
allowed_candidates -= &candidates;
|
||||||
|
final_candidates = match final_candidates.take() {
|
||||||
|
// we add current candidates to best candidates
|
||||||
|
Some((best_rank, mut best_candidates)) => {
|
||||||
|
best_candidates |= candidates;
|
||||||
|
branch.lazy_next();
|
||||||
|
Some((best_rank, best_candidates))
|
||||||
},
|
},
|
||||||
// we take current candidates as best candidates
|
// we take current candidates as best candidates
|
||||||
// and we fetch the next page
|
|
||||||
None => {
|
None => {
|
||||||
let candidates = take(candidates);
|
branch.lazy_next();
|
||||||
if !branch.next()? { PeekMut::pop(branch); }
|
|
||||||
Some((branch_rank, candidates))
|
Some((branch_rank, candidates))
|
||||||
},
|
},
|
||||||
};
|
};
|
||||||
} else {
|
} else {
|
||||||
// we have candidates, lets dig deeper in levels.
|
// we have candidates, lets dig deeper in levels.
|
||||||
branch.query_level_iterator = branch.query_level_iterator.dig(ctx)?;
|
branch.dig(ctx)?;
|
||||||
if !branch.next()? { PeekMut::pop(branch); }
|
if !branch.next(&allowed_candidates)? { PeekMut::pop(branch); }
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
Loading…
x
Reference in New Issue
Block a user