mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 21:50:07 +01:00
Fix two bugs in proximity ranking rule
This commit is contained in:
parent
83e5b4ed0d
commit
384fdc2df4
@ -213,8 +213,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
dead_ends_cache.forbid_condition(latest_condition);
|
dead_ends_cache.forbid_condition(latest_condition);
|
||||||
// 2. remove all the edges with this condition from the ranking rule graph
|
// 2. remove all the edges with this condition from the ranking rule graph
|
||||||
graph.remove_edges_with_condition(latest_condition);
|
graph.remove_edges_with_condition(latest_condition);
|
||||||
// 3. Also remove the entry from the condition_docids_cache, since we don't need it anymore
|
|
||||||
condition_docids_cache.cache.remove(&latest_condition);
|
|
||||||
return Ok(ControlFlow::Continue(()));
|
return Ok(ControlFlow::Continue(()));
|
||||||
}
|
}
|
||||||
path_docids &= condition_docids;
|
path_docids &= condition_docids;
|
||||||
|
@ -2,47 +2,37 @@
|
|||||||
|
|
||||||
use std::iter::FromIterator;
|
use std::iter::FromIterator;
|
||||||
|
|
||||||
use fxhash::FxHashSet;
|
|
||||||
use heed::RoTxn;
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use super::ProximityCondition;
|
use super::ProximityCondition;
|
||||||
use crate::search::new::db_cache::DatabaseCache;
|
use crate::search::new::db_cache::DatabaseCache;
|
||||||
use crate::search::new::interner::{DedupInterner, Interned};
|
use crate::search::new::interner::{DedupInterner, Interned};
|
||||||
use crate::search::new::query_term::{Phrase, QueryTerm};
|
use crate::search::new::query_term::{Phrase, QueryTerm};
|
||||||
|
use crate::search::new::resolve_query_graph::QueryTermDocIdsCache;
|
||||||
use crate::search::new::SearchContext;
|
use crate::search::new::SearchContext;
|
||||||
use crate::{CboRoaringBitmapCodec, Result};
|
use crate::{CboRoaringBitmapCodec, Index, Result};
|
||||||
|
use fxhash::FxHashSet;
|
||||||
|
use heed::RoTxn;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
pub fn compute_docids<'ctx>(
|
pub fn compute_docids<'ctx>(
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
condition: &ProximityCondition,
|
condition: &ProximityCondition,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<(RoaringBitmap, FxHashSet<Interned<String>>, FxHashSet<Interned<Phrase>>)> {
|
) -> Result<(RoaringBitmap, FxHashSet<Interned<String>>, FxHashSet<Interned<Phrase>>)> {
|
||||||
let SearchContext {
|
|
||||||
index,
|
|
||||||
txn,
|
|
||||||
db_cache,
|
|
||||||
word_interner,
|
|
||||||
term_docids,
|
|
||||||
phrase_interner,
|
|
||||||
term_interner,
|
|
||||||
} = ctx;
|
|
||||||
|
|
||||||
let (left_term, right_term, right_term_ngram_len, cost) = match condition {
|
let (left_term, right_term, right_term_ngram_len, cost) = match condition {
|
||||||
ProximityCondition::Uninit { left_term, right_term, right_term_ngram_len, cost } => {
|
ProximityCondition::Uninit { left_term, right_term, right_term_ngram_len, cost } => {
|
||||||
(*left_term, *right_term, *right_term_ngram_len, *cost)
|
(*left_term, *right_term, *right_term_ngram_len, *cost)
|
||||||
}
|
}
|
||||||
ProximityCondition::Term { term } => {
|
ProximityCondition::Term { term } => {
|
||||||
let term_v = term_interner.get(*term);
|
let term_v = ctx.term_interner.get(*term);
|
||||||
return Ok((
|
return Ok((
|
||||||
term_docids
|
ctx.term_docids
|
||||||
.get_query_term_docids(
|
.get_query_term_docids(
|
||||||
index,
|
ctx.index,
|
||||||
txn,
|
ctx.txn,
|
||||||
db_cache,
|
&mut ctx.db_cache,
|
||||||
word_interner,
|
&ctx.word_interner,
|
||||||
term_interner,
|
&ctx.term_interner,
|
||||||
phrase_interner,
|
&ctx.phrase_interner,
|
||||||
*term,
|
*term,
|
||||||
)?
|
)?
|
||||||
.clone(),
|
.clone(),
|
||||||
@ -52,8 +42,8 @@ pub fn compute_docids<'ctx>(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let left_term = term_interner.get(left_term);
|
let left_term = ctx.term_interner.get(left_term);
|
||||||
let right_term = term_interner.get(right_term);
|
let right_term = ctx.term_interner.get(right_term);
|
||||||
|
|
||||||
// e.g. for the simple words `sun .. flower`
|
// e.g. for the simple words `sun .. flower`
|
||||||
// the cost is 5
|
// the cost is 5
|
||||||
@ -73,12 +63,14 @@ pub fn compute_docids<'ctx>(
|
|||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
|
|
||||||
if let Some(right_prefix) = right_term.use_prefix_db {
|
if let Some(right_prefix) = right_term.use_prefix_db {
|
||||||
for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) {
|
for (left_phrase, left_word) in last_word_of_term_iter(left_term, &ctx.phrase_interner) {
|
||||||
compute_prefix_edges(
|
compute_prefix_edges(
|
||||||
index,
|
ctx.index,
|
||||||
txn,
|
ctx.txn,
|
||||||
db_cache,
|
&mut ctx.db_cache,
|
||||||
word_interner,
|
&mut ctx.term_docids,
|
||||||
|
&ctx.word_interner,
|
||||||
|
&ctx.phrase_interner,
|
||||||
left_word,
|
left_word,
|
||||||
right_prefix,
|
right_prefix,
|
||||||
left_phrase,
|
left_phrase,
|
||||||
@ -99,13 +91,16 @@ pub fn compute_docids<'ctx>(
|
|||||||
// + one-typo/zero-typo, then one-typo/one-typo, then ... until an arbitrary limit has been
|
// + one-typo/zero-typo, then one-typo/one-typo, then ... until an arbitrary limit has been
|
||||||
// reached
|
// reached
|
||||||
|
|
||||||
for (left_phrase, left_word) in last_word_of_term_iter(left_term, phrase_interner) {
|
for (left_phrase, left_word) in last_word_of_term_iter(left_term, &ctx.phrase_interner) {
|
||||||
for (right_word, right_phrase) in first_word_of_term_iter(right_term, phrase_interner) {
|
for (right_word, right_phrase) in first_word_of_term_iter(right_term, &ctx.phrase_interner)
|
||||||
|
{
|
||||||
compute_non_prefix_edges(
|
compute_non_prefix_edges(
|
||||||
index,
|
ctx.index,
|
||||||
txn,
|
ctx.txn,
|
||||||
db_cache,
|
&mut ctx.db_cache,
|
||||||
word_interner,
|
&mut ctx.term_docids,
|
||||||
|
&ctx.word_interner,
|
||||||
|
&ctx.phrase_interner,
|
||||||
left_word,
|
left_word,
|
||||||
right_word,
|
right_word,
|
||||||
&[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(),
|
&[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(),
|
||||||
@ -123,10 +118,12 @@ pub fn compute_docids<'ctx>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn compute_prefix_edges<'ctx>(
|
fn compute_prefix_edges<'ctx>(
|
||||||
index: &mut &crate::Index,
|
index: &Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'ctx>,
|
db_cache: &mut DatabaseCache<'ctx>,
|
||||||
word_interner: &mut DedupInterner<String>,
|
term_docids: &mut QueryTermDocIdsCache,
|
||||||
|
word_interner: &DedupInterner<String>,
|
||||||
|
phrase_interner: &DedupInterner<Phrase>,
|
||||||
left_word: Interned<String>,
|
left_word: Interned<String>,
|
||||||
right_prefix: Interned<String>,
|
right_prefix: Interned<String>,
|
||||||
left_phrase: Option<Interned<Phrase>>,
|
left_phrase: Option<Interned<Phrase>>,
|
||||||
@ -137,10 +134,23 @@ fn compute_prefix_edges<'ctx>(
|
|||||||
used_words: &mut FxHashSet<Interned<String>>,
|
used_words: &mut FxHashSet<Interned<String>>,
|
||||||
used_phrases: &mut FxHashSet<Interned<Phrase>>,
|
used_phrases: &mut FxHashSet<Interned<Phrase>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
|
let mut universe = universe.clone();
|
||||||
if let Some(phrase) = left_phrase {
|
if let Some(phrase) = left_phrase {
|
||||||
// TODO: compute the phrase, take the intersection between
|
let phrase_docids = term_docids.get_phrase_docids(
|
||||||
// the phrase and the docids
|
index,
|
||||||
used_phrases.insert(phrase); // This is not fully correct
|
txn,
|
||||||
|
db_cache,
|
||||||
|
word_interner,
|
||||||
|
phrase_interner,
|
||||||
|
phrase,
|
||||||
|
)?;
|
||||||
|
if !phrase_docids.is_empty() {
|
||||||
|
used_phrases.insert(phrase);
|
||||||
|
}
|
||||||
|
universe &= phrase_docids;
|
||||||
|
if universe.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
if let Some(new_docids) = db_cache.get_word_prefix_pair_proximity_docids(
|
if let Some(new_docids) = db_cache.get_word_prefix_pair_proximity_docids(
|
||||||
@ -151,7 +161,7 @@ fn compute_prefix_edges<'ctx>(
|
|||||||
right_prefix,
|
right_prefix,
|
||||||
forward_proximity,
|
forward_proximity,
|
||||||
)? {
|
)? {
|
||||||
let new_docids = universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
used_words.insert(left_word);
|
used_words.insert(left_word);
|
||||||
used_words.insert(right_prefix);
|
used_words.insert(right_prefix);
|
||||||
@ -169,7 +179,7 @@ fn compute_prefix_edges<'ctx>(
|
|||||||
left_word,
|
left_word,
|
||||||
backward_proximity,
|
backward_proximity,
|
||||||
)? {
|
)? {
|
||||||
let new_docids = universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
used_words.insert(left_word);
|
used_words.insert(left_word);
|
||||||
used_words.insert(right_prefix);
|
used_words.insert(right_prefix);
|
||||||
@ -182,10 +192,12 @@ fn compute_prefix_edges<'ctx>(
|
|||||||
}
|
}
|
||||||
|
|
||||||
fn compute_non_prefix_edges<'ctx>(
|
fn compute_non_prefix_edges<'ctx>(
|
||||||
index: &mut &crate::Index,
|
index: &Index,
|
||||||
txn: &'ctx RoTxn,
|
txn: &'ctx RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'ctx>,
|
db_cache: &mut DatabaseCache<'ctx>,
|
||||||
word_interner: &mut DedupInterner<String>,
|
term_docids: &mut QueryTermDocIdsCache,
|
||||||
|
word_interner: &DedupInterner<String>,
|
||||||
|
phrase_interner: &DedupInterner<Phrase>,
|
||||||
word1: Interned<String>,
|
word1: Interned<String>,
|
||||||
word2: Interned<String>,
|
word2: Interned<String>,
|
||||||
phrases: &[Interned<Phrase>],
|
phrases: &[Interned<Phrase>],
|
||||||
@ -196,10 +208,23 @@ fn compute_non_prefix_edges<'ctx>(
|
|||||||
used_words: &mut FxHashSet<Interned<String>>,
|
used_words: &mut FxHashSet<Interned<String>>,
|
||||||
used_phrases: &mut FxHashSet<Interned<Phrase>>,
|
used_phrases: &mut FxHashSet<Interned<Phrase>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
if !phrases.is_empty() {
|
let mut universe = universe.clone();
|
||||||
// TODO: compute the docids associated with these phrases
|
for phrase in phrases {
|
||||||
// take their intersection with the new docids
|
let phrase_docids = term_docids.get_phrase_docids(
|
||||||
used_phrases.extend(phrases); // This is not fully correct
|
index,
|
||||||
|
txn,
|
||||||
|
db_cache,
|
||||||
|
word_interner,
|
||||||
|
phrase_interner,
|
||||||
|
*phrase,
|
||||||
|
)?;
|
||||||
|
if !phrase_docids.is_empty() {
|
||||||
|
used_phrases.insert(*phrase);
|
||||||
|
}
|
||||||
|
universe &= phrase_docids;
|
||||||
|
if universe.is_empty() {
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
}
|
}
|
||||||
if let Some(new_docids) = db_cache.get_word_pair_proximity_docids(
|
if let Some(new_docids) = db_cache.get_word_pair_proximity_docids(
|
||||||
index,
|
index,
|
||||||
@ -209,7 +234,7 @@ fn compute_non_prefix_edges<'ctx>(
|
|||||||
word2,
|
word2,
|
||||||
forward_proximity,
|
forward_proximity,
|
||||||
)? {
|
)? {
|
||||||
let new_docids = universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
used_words.insert(word1);
|
used_words.insert(word1);
|
||||||
used_words.insert(word2);
|
used_words.insert(word2);
|
||||||
@ -228,7 +253,7 @@ fn compute_non_prefix_edges<'ctx>(
|
|||||||
word1,
|
word1,
|
||||||
backward_proximity,
|
backward_proximity,
|
||||||
)? {
|
)? {
|
||||||
let new_docids = universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
used_words.insert(word1);
|
used_words.insert(word1);
|
||||||
used_words.insert(word2);
|
used_words.insert(word2);
|
||||||
|
Loading…
x
Reference in New Issue
Block a user