mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Always do the intersections with the universe
This commit is contained in:
parent
50a7393c55
commit
0ca1a4e805
9 changed files with 201 additions and 107 deletions
|
@ -19,11 +19,16 @@ pub struct PhraseDocIdsCache {
|
|||
}
|
||||
impl<'ctx> SearchContext<'ctx> {
|
||||
/// Get the document ids associated with the given phrase
|
||||
pub fn get_phrase_docids(&mut self, phrase: Interned<Phrase>) -> Result<&RoaringBitmap> {
|
||||
pub fn get_phrase_docids(
|
||||
&mut self,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
phrase: Interned<Phrase>,
|
||||
) -> Result<&RoaringBitmap> {
|
||||
if self.phrase_docids.cache.contains_key(&phrase) {
|
||||
return Ok(&self.phrase_docids.cache[&phrase]);
|
||||
};
|
||||
let docids = compute_phrase_docids(self, phrase)?;
|
||||
let docids = compute_phrase_docids(self, universe, phrase)?;
|
||||
// TODO can we improve that? Because there is an issue, we keep that in cache...
|
||||
let _ = self.phrase_docids.cache.insert(phrase, docids);
|
||||
let docids = &self.phrase_docids.cache[&phrase];
|
||||
Ok(docids)
|
||||
|
@ -35,17 +40,18 @@ pub fn compute_query_term_subset_docids(
|
|||
term: &QueryTermSubset,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let mut docids = RoaringBitmap::new();
|
||||
// TODO use the MultiOps trait to do large intersections
|
||||
for word in term.all_single_words_except_prefix_db(ctx)? {
|
||||
if let Some(word_docids) = ctx.word_docids(word)? {
|
||||
if let Some(word_docids) = ctx.word_docids(universe, word)? {
|
||||
docids |= word_docids;
|
||||
}
|
||||
}
|
||||
for phrase in term.all_phrases(ctx)? {
|
||||
docids |= ctx.get_phrase_docids(phrase)?;
|
||||
docids |= ctx.get_phrase_docids(universe, phrase)?;
|
||||
}
|
||||
|
||||
if let Some(prefix) = term.use_prefix_db(ctx) {
|
||||
if let Some(prefix_docids) = ctx.word_prefix_docids(prefix)? {
|
||||
if let Some(prefix_docids) = ctx.word_prefix_docids(universe, prefix)? {
|
||||
docids |= prefix_docids;
|
||||
}
|
||||
}
|
||||
|
@ -58,12 +64,13 @@ pub fn compute_query_term_subset_docids(
|
|||
|
||||
pub fn compute_query_term_subset_docids_within_field_id(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
term: &QueryTermSubset,
|
||||
fid: u16,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let mut docids = RoaringBitmap::new();
|
||||
for word in term.all_single_words_except_prefix_db(ctx)? {
|
||||
if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(word.interned(), fid)? {
|
||||
if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(universe, word.interned(), fid)? {
|
||||
docids |= word_fid_docids;
|
||||
}
|
||||
}
|
||||
|
@ -72,15 +79,15 @@ pub fn compute_query_term_subset_docids_within_field_id(
|
|||
// There may be false positives when resolving a phrase, so we're not
|
||||
// guaranteed that all of its words are within a single fid.
|
||||
if let Some(word) = phrase.words(ctx).iter().flatten().next() {
|
||||
if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(*word, fid)? {
|
||||
docids |= ctx.get_phrase_docids(phrase)? & word_fid_docids;
|
||||
if let Some(word_fid_docids) = ctx.get_db_word_fid_docids(universe, *word, fid)? {
|
||||
docids |= ctx.get_phrase_docids(Some(&word_fid_docids), phrase)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(word_prefix) = term.use_prefix_db(ctx) {
|
||||
if let Some(word_fid_docids) =
|
||||
ctx.get_db_word_prefix_fid_docids(word_prefix.interned(), fid)?
|
||||
ctx.get_db_word_prefix_fid_docids(universe, word_prefix.interned(), fid)?
|
||||
{
|
||||
docids |= word_fid_docids;
|
||||
}
|
||||
|
@ -91,13 +98,14 @@ pub fn compute_query_term_subset_docids_within_field_id(
|
|||
|
||||
pub fn compute_query_term_subset_docids_within_position(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
term: &QueryTermSubset,
|
||||
position: u16,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let mut docids = RoaringBitmap::new();
|
||||
for word in term.all_single_words_except_prefix_db(ctx)? {
|
||||
if let Some(word_position_docids) =
|
||||
ctx.get_db_word_position_docids(word.interned(), position)?
|
||||
ctx.get_db_word_position_docids(universe, word.interned(), position)?
|
||||
{
|
||||
docids |= word_position_docids;
|
||||
}
|
||||
|
@ -107,15 +115,17 @@ pub fn compute_query_term_subset_docids_within_position(
|
|||
// It's difficult to know the expected position of the words in the phrase,
|
||||
// so instead we just check the first one.
|
||||
if let Some(word) = phrase.words(ctx).iter().flatten().next() {
|
||||
if let Some(word_position_docids) = ctx.get_db_word_position_docids(*word, position)? {
|
||||
docids |= ctx.get_phrase_docids(phrase)? & word_position_docids
|
||||
if let Some(word_position_docids) =
|
||||
ctx.get_db_word_position_docids(universe, *word, position)?
|
||||
{
|
||||
docids |= ctx.get_phrase_docids(Some(&word_position_docids), phrase)?;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(word_prefix) = term.use_prefix_db(ctx) {
|
||||
if let Some(word_position_docids) =
|
||||
ctx.get_db_word_prefix_position_docids(word_prefix.interned(), position)?
|
||||
ctx.get_db_word_prefix_position_docids(universe, word_prefix.interned(), position)?
|
||||
{
|
||||
docids |= word_position_docids;
|
||||
}
|
||||
|
@ -180,6 +190,7 @@ pub fn compute_query_graph_docids(
|
|||
|
||||
pub fn compute_phrase_docids(
|
||||
ctx: &mut SearchContext<'_>,
|
||||
universe: Option<&RoaringBitmap>,
|
||||
phrase: Interned<Phrase>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let Phrase { words } = ctx.phrase_interner.get(phrase).clone();
|
||||
|
@ -189,7 +200,7 @@ pub fn compute_phrase_docids(
|
|||
}
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
for word in words.iter().flatten().copied() {
|
||||
if let Some(word_docids) = ctx.word_docids(Word::Original(word))? {
|
||||
if let Some(word_docids) = ctx.word_docids(universe, Word::Original(word))? {
|
||||
candidates |= word_docids;
|
||||
} else {
|
||||
return Ok(RoaringBitmap::new());
|
||||
|
@ -213,7 +224,7 @@ pub fn compute_phrase_docids(
|
|||
.filter_map(|(index, word)| word.as_ref().map(|word| (index, word)))
|
||||
{
|
||||
if dist == 0 {
|
||||
match ctx.get_db_word_pair_proximity_docids(s1, s2, 1)? {
|
||||
match ctx.get_db_word_pair_proximity_docids(universe, s1, s2, 1)? {
|
||||
Some(m) => bitmaps.push(m),
|
||||
// If there are no documents for this pair, there will be no
|
||||
// results for the phrase query.
|
||||
|
@ -223,7 +234,7 @@ pub fn compute_phrase_docids(
|
|||
let mut bitmap = RoaringBitmap::new();
|
||||
for dist in 0..=dist {
|
||||
if let Some(m) =
|
||||
ctx.get_db_word_pair_proximity_docids(s1, s2, dist as u8 + 1)?
|
||||
ctx.get_db_word_pair_proximity_docids(universe, s1, s2, dist as u8 + 1)?
|
||||
{
|
||||
bitmap |= m;
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue