mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 11:47:28 +01:00
Improve the mDFS performance and return the proximity
This commit is contained in:
parent
bb15f16d8c
commit
e9e03259c1
49
src/mdfs.rs
49
src/mdfs.rs
@ -32,18 +32,18 @@ impl<'a> Mdfs<'a> {
|
||||
}
|
||||
|
||||
impl<'a> Iterator for Mdfs<'a> {
|
||||
type Item = anyhow::Result<RoaringBitmap>;
|
||||
type Item = anyhow::Result<(u32, RoaringBitmap)>;
|
||||
|
||||
fn next(&mut self) -> Option<Self::Item> {
|
||||
// If there is less or only one word therefore the only
|
||||
// possible documents that we can return are the candidates.
|
||||
if self.words.len() <= 1 {
|
||||
if self.candidates.is_empty() { return None }
|
||||
return Some(Ok(mem::take(&mut self.candidates)));
|
||||
return Some(Ok((0, mem::take(&mut self.candidates))));
|
||||
}
|
||||
|
||||
let mut answer = RoaringBitmap::new();
|
||||
while self.mana <= self.max_mana {
|
||||
let mut answer = RoaringBitmap::new();
|
||||
let result = mdfs_step(
|
||||
&self.index,
|
||||
&self.rtxn,
|
||||
@ -52,24 +52,25 @@ impl<'a> Iterator for Mdfs<'a> {
|
||||
&self.candidates,
|
||||
&self.candidates,
|
||||
&mut self.union_cache,
|
||||
&mut answer,
|
||||
);
|
||||
|
||||
match result {
|
||||
Ok(Some(a)) => {
|
||||
// We remove the answered documents from the list of
|
||||
// candidates to be sure we don't search for them again.
|
||||
self.candidates.difference_with(&a);
|
||||
answer.union_with(&a);
|
||||
},
|
||||
Ok(None) => {
|
||||
// We found the last iteration for this amount of mana that gives nothing,
|
||||
// we can now store that the next mana to use for the loop is incremented.
|
||||
Ok(()) => {
|
||||
// We always increase the mana for the next loop.
|
||||
let proximity = self.mana;
|
||||
self.mana = self.mana + 1;
|
||||
// If the answer is empty it means that we found nothing for this amount
|
||||
// of mana therefore we continue with a bigger mana.
|
||||
|
||||
// If no documents were found we must not return and continue
|
||||
// the search with more mana.
|
||||
if !answer.is_empty() {
|
||||
// Otherwise we return the answer.
|
||||
return Some(Ok(answer));
|
||||
|
||||
// We remove the answered documents from the list of
|
||||
// candidates to be sure we don't search for them again.
|
||||
self.candidates.difference_with(&answer);
|
||||
|
||||
// We return the answer.
|
||||
return Some(Ok((proximity, answer)));
|
||||
}
|
||||
},
|
||||
Err(e) => return Some(Err(e)),
|
||||
@ -88,7 +89,8 @@ fn mdfs_step(
|
||||
candidates: &RoaringBitmap,
|
||||
parent_docids: &RoaringBitmap,
|
||||
union_cache: &mut HashMap<(usize, u8), RoaringBitmap>,
|
||||
) -> anyhow::Result<Option<RoaringBitmap>>
|
||||
answer: &mut RoaringBitmap,
|
||||
) -> anyhow::Result<()>
|
||||
{
|
||||
use std::cmp::{min, max};
|
||||
|
||||
@ -126,19 +128,22 @@ fn mdfs_step(
|
||||
}
|
||||
};
|
||||
|
||||
// We must be sure that we only return docids that are present in the candidates.
|
||||
docids.intersect_with(parent_docids);
|
||||
|
||||
if !docids.is_empty() {
|
||||
let mana = mana.checked_sub(proximity as u32).unwrap();
|
||||
// We are the last pair, we return without recursing as we don't have any child.
|
||||
if tail.len() < 2 { return Ok(Some(docids)) }
|
||||
if let Some(di) = mdfs_step(index, rtxn, mana, tail, candidates, &docids, union_cache)? {
|
||||
return Ok(Some(di))
|
||||
if tail.len() < 2 {
|
||||
// We are the last pair, we return without recuring as we don't have any child.
|
||||
answer.union_with(&docids);
|
||||
return Ok(());
|
||||
} else {
|
||||
return mdfs_step(index, rtxn, mana, tail, candidates, &docids, union_cache, answer);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(None)
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn words_pair_combinations<'h>(
|
||||
|
@ -164,8 +164,8 @@ impl<'a> Search<'a> {
|
||||
// We execute the Mdfs iterator until we find enough documents.
|
||||
while documents.iter().map(RoaringBitmap::len).sum::<u64>() < limit as u64 {
|
||||
match mdfs.next().transpose()? {
|
||||
Some(answer) => {
|
||||
debug!("answer: {:?}", answer);
|
||||
Some((proximity, answer)) => {
|
||||
debug!("answer with a proximity of {}: {:?}", proximity, answer);
|
||||
documents.push(answer);
|
||||
},
|
||||
None => break,
|
||||
|
@ -90,7 +90,7 @@ fn is_chinese(c: char) -> bool {
|
||||
/// length of the found key. Otherwise `None` is returned.
|
||||
///
|
||||
/// This can be used to e.g. build tokenizing functions.
|
||||
//
|
||||
// Copyright @llogiq
|
||||
// https://github.com/BurntSushi/fst/pull/104
|
||||
#[inline]
|
||||
fn find_longest_prefix(fst: &Fst<&[u8]>, value: &[u8]) -> Option<(u64, usize)> {
|
||||
|
Loading…
x
Reference in New Issue
Block a user