mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-25 20:57:35 +01:00
Search for documents with longer proximities until we find enough
This commit is contained in:
parent
f277ea134f
commit
54370e228a
@ -535,6 +535,7 @@ fn merge(key: &[u8], values: &[Vec<u8>]) -> Result<Vec<u8>, ()> {
|
|||||||
|
|
||||||
// TODO merge with the previous values
|
// TODO merge with the previous values
|
||||||
// TODO store the documents in a compressed MTBL
|
// TODO store the documents in a compressed MTBL
|
||||||
|
// TODO prefer using iter.append when possible, it is way faster (4x) to inject ordered entries.
|
||||||
fn lmdb_writer(wtxn: &mut heed::RwTxn, index: &Index, key: &[u8], val: &[u8]) -> anyhow::Result<()> {
|
fn lmdb_writer(wtxn: &mut heed::RwTxn, index: &Index, key: &[u8], val: &[u8]) -> anyhow::Result<()> {
|
||||||
if key == WORDS_FST_KEY {
|
if key == WORDS_FST_KEY {
|
||||||
// Write the words fst
|
// Write the words fst
|
||||||
|
@ -176,7 +176,7 @@ impl<'a> Search<'a> {
|
|||||||
&self,
|
&self,
|
||||||
words: &[(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
|
words: &[(HashMap<String, (u8, RoaringBitmap)>, RoaringBitmap)],
|
||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
parent_docids: Option<&RoaringBitmap>,
|
parent_docids: &RoaringBitmap,
|
||||||
union_cache: &mut HashMap<(usize, u8), RoaringBitmap>,
|
union_cache: &mut HashMap<(usize, u8), RoaringBitmap>,
|
||||||
) -> anyhow::Result<Option<RoaringBitmap>>
|
) -> anyhow::Result<Option<RoaringBitmap>>
|
||||||
{
|
{
|
||||||
@ -202,15 +202,13 @@ impl<'a> Search<'a> {
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
if let Some(parent_docids) = &parent_docids {
|
|
||||||
docids.intersect_with(parent_docids);
|
docids.intersect_with(parent_docids);
|
||||||
}
|
|
||||||
|
|
||||||
if !docids.is_empty() {
|
if !docids.is_empty() {
|
||||||
let words = &words[1..];
|
let words = &words[1..];
|
||||||
// We are the last word.
|
// We are the last word.
|
||||||
if words.len() < 2 { return Ok(Some(docids)) }
|
if words.len() < 2 { return Ok(Some(docids)) }
|
||||||
if let Some(di) = self.depth_first_search(words, candidates, Some(&docids), union_cache)? {
|
if let Some(di) = self.depth_first_search(words, candidates, &docids, union_cache)? {
|
||||||
return Ok(Some(di))
|
return Ok(Some(di))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -250,9 +248,24 @@ impl<'a> Search<'a> {
|
|||||||
return Ok(SearchResult { found_words, documents_ids });
|
return Ok(SearchResult { found_words, documents_ids });
|
||||||
}
|
}
|
||||||
|
|
||||||
let mut union_cache = HashMap::new();
|
|
||||||
let mut documents = Vec::new();
|
let mut documents = Vec::new();
|
||||||
if let Some(answer) = answer {
|
let mut union_cache = HashMap::new();
|
||||||
|
|
||||||
|
// We execute the DFS until we find enough documents, we run it with the
|
||||||
|
// candidates list and remove the found documents from this list at each iteration.
|
||||||
|
while documents.iter().map(RoaringBitmap::len).sum::<u64>() < limit as u64 {
|
||||||
|
let answer = self.depth_first_search(&derived_words, &candidates, &candidates, &mut union_cache)?;
|
||||||
|
|
||||||
|
let answer = match answer {
|
||||||
|
Some(answer) if !answer.is_empty() => answer,
|
||||||
|
_ => break,
|
||||||
|
};
|
||||||
|
|
||||||
|
debug!("answer: {:?}", answer);
|
||||||
|
|
||||||
|
// We remove the answered documents from the list of
|
||||||
|
// candidates to be sure we don't search for them again.
|
||||||
|
candidates.difference_with(&answer);
|
||||||
documents.push(answer);
|
documents.push(answer);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Loading…
x
Reference in New Issue
Block a user