mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-04 20:18:55 +01:00
Rewrite proximity ranking rule
This commit is contained in:
parent
ae6bb1ce17
commit
01e24dd630
@ -2,44 +2,26 @@
|
||||
|
||||
use super::ProximityCondition;
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::LocatedQueryTerm;
|
||||
use crate::search::new::{QueryNode, SearchContext};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
pub fn build_edges(
|
||||
_ctx: &mut SearchContext,
|
||||
conditions_interner: &mut DedupInterner<ProximityCondition>,
|
||||
from_node: &QueryNode,
|
||||
to_node: &QueryNode,
|
||||
) -> Result<Vec<(u8, Option<Interned<ProximityCondition>>)>> {
|
||||
let right_term = match &to_node.data {
|
||||
QueryNodeData::End => return Ok(vec![(0, None)]),
|
||||
QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]),
|
||||
QueryNodeData::Term(term) => term,
|
||||
left_term: Option<&LocatedQueryTermSubset>,
|
||||
right_term: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<ProximityCondition>)>> {
|
||||
let right_ngram_length = right_term.term_ids.len();
|
||||
|
||||
let Some(left_term) = left_term else {
|
||||
return Ok(vec![(
|
||||
(right_ngram_length - 1) as u32,
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
)])
|
||||
};
|
||||
|
||||
let LocatedQueryTerm { value: right_term_interned, positions: right_positions } = right_term;
|
||||
|
||||
let (right_start_position, right_ngram_length) =
|
||||
(*right_positions.start(), right_positions.len());
|
||||
|
||||
let (left_term_interned, left_end_position) = match &from_node.data {
|
||||
QueryNodeData::Term(LocatedQueryTerm { value, positions }) => (*value, *positions.end()),
|
||||
QueryNodeData::Deleted => return Ok(vec![]),
|
||||
QueryNodeData::Start => {
|
||||
return Ok(vec![(
|
||||
(right_ngram_length - 1) as u8,
|
||||
Some(
|
||||
conditions_interner
|
||||
.insert(ProximityCondition::Term { term: *right_term_interned }),
|
||||
),
|
||||
)])
|
||||
}
|
||||
QueryNodeData::End => return Ok(vec![]),
|
||||
};
|
||||
|
||||
if left_end_position + 1 != right_start_position {
|
||||
if left_term.positions.end() + 1 != *right_term.positions.start() {
|
||||
// We want to ignore this pair of terms
|
||||
// Unconditionally walk through the edge without computing the docids
|
||||
// This can happen when, in a query like `the sun flowers are beautiful`, the term
|
||||
@ -47,30 +29,26 @@ pub fn build_edges(
|
||||
// The remaining query graph represents `the sun .. are beautiful`
|
||||
// but `sun` and `are` have no proximity condition between them
|
||||
return Ok(vec![(
|
||||
(right_ngram_length - 1) as u8,
|
||||
Some(
|
||||
conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }),
|
||||
),
|
||||
(right_ngram_length - 1) as u32,
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
)]);
|
||||
}
|
||||
|
||||
let mut conditions = vec![];
|
||||
for cost in right_ngram_length..(7 + right_ngram_length) {
|
||||
let cost = cost as u8;
|
||||
conditions.push((
|
||||
cost,
|
||||
Some(conditions_interner.insert(ProximityCondition::Uninit {
|
||||
left_term: left_term_interned,
|
||||
right_term: *right_term_interned,
|
||||
right_term_ngram_len: right_ngram_length as u8,
|
||||
cost,
|
||||
})),
|
||||
cost as u32,
|
||||
conditions_interner.insert(ProximityCondition::Uninit {
|
||||
left_term: left_term.clone(),
|
||||
right_term: right_term.clone(),
|
||||
cost: cost as u8,
|
||||
}),
|
||||
))
|
||||
}
|
||||
|
||||
conditions.push((
|
||||
(7 + right_ngram_length) as u8,
|
||||
Some(conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned })),
|
||||
(7 + right_ngram_length) as u32,
|
||||
conditions_interner.insert(ProximityCondition::Term { term: right_term.clone() }),
|
||||
));
|
||||
|
||||
Ok(conditions)
|
||||
|
@ -1,49 +1,37 @@
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use super::ProximityCondition;
|
||||
use crate::search::new::db_cache::DatabaseCache;
|
||||
use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_term::{Phrase, QueryTerm};
|
||||
use crate::search::new::resolve_query_graph::QueryTermDocIdsCache;
|
||||
use crate::search::new::interner::Interned;
|
||||
use crate::search::new::query_term::{Phrase, QueryTermSubset};
|
||||
use crate::search::new::ranking_rule_graph::ComputedCondition;
|
||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result};
|
||||
use fxhash::FxHashSet;
|
||||
use heed::RoTxn;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
use roaring::RoaringBitmap;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
pub fn compute_docids(
|
||||
ctx: &mut SearchContext,
|
||||
condition: &ProximityCondition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<(RoaringBitmap, FxHashSet<Interned<String>>, FxHashSet<Interned<Phrase>>)> {
|
||||
let (left_term, right_term, right_term_ngram_len, cost) = match condition {
|
||||
ProximityCondition::Uninit { left_term, right_term, right_term_ngram_len, cost } => {
|
||||
(*left_term, *right_term, *right_term_ngram_len, *cost)
|
||||
) -> Result<ComputedCondition> {
|
||||
let (left_term, right_term, cost) = match condition {
|
||||
ProximityCondition::Uninit { left_term, right_term, cost } => {
|
||||
(left_term, right_term, *cost)
|
||||
}
|
||||
ProximityCondition::Term { term } => {
|
||||
let term_v = ctx.term_interner.get(*term);
|
||||
return Ok((
|
||||
ctx.term_docids
|
||||
.get_query_term_docids(
|
||||
ctx.index,
|
||||
ctx.txn,
|
||||
&mut ctx.db_cache,
|
||||
&ctx.word_interner,
|
||||
&ctx.term_interner,
|
||||
&ctx.phrase_interner,
|
||||
*term,
|
||||
)?
|
||||
.clone(),
|
||||
FxHashSet::from_iter(term_v.all_single_words_except_prefix_db()),
|
||||
FxHashSet::from_iter(term_v.all_phrases()),
|
||||
));
|
||||
let mut docids = compute_query_term_subset_docids(ctx, &term.term_subset)?;
|
||||
docids &= universe;
|
||||
return Ok(ComputedCondition {
|
||||
docids,
|
||||
universe_len: universe.len(),
|
||||
start_term_subset: None,
|
||||
end_term_subset: term.clone(),
|
||||
});
|
||||
}
|
||||
};
|
||||
|
||||
let left_term = ctx.term_interner.get(left_term);
|
||||
let right_term = ctx.term_interner.get(right_term);
|
||||
let right_term_ngram_len = right_term.term_ids.len() as u8;
|
||||
|
||||
// e.g. for the simple words `sun .. flower`
|
||||
// the cost is 5
|
||||
@ -57,20 +45,13 @@ pub fn compute_docids(
|
||||
let forward_proximity = 1 + cost - right_term_ngram_len;
|
||||
let backward_proximity = cost - right_term_ngram_len;
|
||||
|
||||
let mut used_words = FxHashSet::default();
|
||||
let mut used_phrases = FxHashSet::default();
|
||||
|
||||
let mut docids = RoaringBitmap::new();
|
||||
|
||||
if let Some(right_prefix) = right_term.use_prefix_db {
|
||||
for (left_phrase, left_word) in last_word_of_term_iter(left_term, &ctx.phrase_interner) {
|
||||
if let Some(right_prefix) = right_term.term_subset.use_prefix_db(ctx) {
|
||||
for (left_phrase, left_word) in last_words_of_term_derivations(ctx, &left_term.term_subset)?
|
||||
{
|
||||
compute_prefix_edges(
|
||||
ctx.index,
|
||||
ctx.txn,
|
||||
&mut ctx.db_cache,
|
||||
&mut ctx.term_docids,
|
||||
&ctx.word_interner,
|
||||
&ctx.phrase_interner,
|
||||
ctx,
|
||||
left_word,
|
||||
right_prefix,
|
||||
left_phrase,
|
||||
@ -78,8 +59,6 @@ pub fn compute_docids(
|
||||
backward_proximity,
|
||||
&mut docids,
|
||||
universe,
|
||||
&mut used_words,
|
||||
&mut used_phrases,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
@ -91,39 +70,60 @@ pub fn compute_docids(
|
||||
// + one-typo/zero-typo, then one-typo/one-typo, then ... until an arbitrary limit has been
|
||||
// reached
|
||||
|
||||
for (left_phrase, left_word) in last_word_of_term_iter(left_term, &ctx.phrase_interner) {
|
||||
for (right_word, right_phrase) in first_word_of_term_iter(right_term, &ctx.phrase_interner)
|
||||
{
|
||||
for (left_phrase, left_word) in last_words_of_term_derivations(ctx, &left_term.term_subset)? {
|
||||
// Before computing the edges, check that the left word and left phrase
|
||||
// aren't disjoint with the universe, but only do it if there is more than
|
||||
// one word derivation to the right.
|
||||
//
|
||||
// This is an optimisation to avoid checking for an excessive number of
|
||||
// pairs.
|
||||
// WAIT, NO.
|
||||
// This should only be done once per node.
|
||||
// Here, we'll potentially do is.. 16 times?
|
||||
// Maybe we should do it at edge-build time instead.
|
||||
// Same for the future attribute ranking rule.
|
||||
let right_derivs = first_word_of_term_iter(ctx, &right_term.term_subset)?;
|
||||
if right_derivs.len() > 1 {
|
||||
let universe = &universe;
|
||||
if let Some(left_phrase) = left_phrase {
|
||||
if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) {
|
||||
continue;
|
||||
}
|
||||
} else if let Some(lw_bytes) = ctx.get_db_word_docids(left_word)? {
|
||||
let left_word_docids = CboRoaringBitmapCodec::deserialize_from(lw_bytes)?;
|
||||
if universe.is_disjoint(&left_word_docids) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (right_word, right_phrase) in right_derivs {
|
||||
compute_non_prefix_edges(
|
||||
ctx.index,
|
||||
ctx.txn,
|
||||
&mut ctx.db_cache,
|
||||
&mut ctx.term_docids,
|
||||
&ctx.word_interner,
|
||||
&ctx.phrase_interner,
|
||||
ctx,
|
||||
left_word,
|
||||
right_word,
|
||||
&[left_phrase, right_phrase].iter().copied().flatten().collect::<Vec<_>>(),
|
||||
left_phrase,
|
||||
right_phrase,
|
||||
forward_proximity,
|
||||
backward_proximity,
|
||||
&mut docids,
|
||||
universe,
|
||||
&mut used_words,
|
||||
&mut used_phrases,
|
||||
)?;
|
||||
}
|
||||
}
|
||||
|
||||
Ok((docids, used_words, used_phrases))
|
||||
Ok(ComputedCondition {
|
||||
docids,
|
||||
universe_len: universe.len(),
|
||||
// TODO: think about whether we want to reduce the subset,
|
||||
// we probably should!
|
||||
start_term_subset: Some(left_term.clone()),
|
||||
end_term_subset: right_term.clone(),
|
||||
})
|
||||
}
|
||||
|
||||
fn compute_prefix_edges<'ctx>(
|
||||
index: &Index,
|
||||
txn: &'ctx RoTxn,
|
||||
db_cache: &mut DatabaseCache<'ctx>,
|
||||
term_docids: &mut QueryTermDocIdsCache,
|
||||
word_interner: &DedupInterner<String>,
|
||||
phrase_interner: &DedupInterner<Phrase>,
|
||||
fn compute_prefix_edges(
|
||||
ctx: &mut SearchContext,
|
||||
left_word: Interned<String>,
|
||||
right_prefix: Interned<String>,
|
||||
left_phrase: Option<Interned<Phrase>>,
|
||||
@ -131,21 +131,16 @@ fn compute_prefix_edges<'ctx>(
|
||||
backward_proximity: u8,
|
||||
docids: &mut RoaringBitmap,
|
||||
universe: &RoaringBitmap,
|
||||
used_words: &mut FxHashSet<Interned<String>>,
|
||||
used_phrases: &mut FxHashSet<Interned<Phrase>>,
|
||||
) -> Result<()> {
|
||||
let mut used_left_words = BTreeSet::new();
|
||||
let mut used_left_phrases = BTreeSet::new();
|
||||
let mut used_right_prefix = BTreeSet::new();
|
||||
|
||||
let mut universe = universe.clone();
|
||||
if let Some(phrase) = left_phrase {
|
||||
let phrase_docids = term_docids.get_phrase_docids(
|
||||
index,
|
||||
txn,
|
||||
db_cache,
|
||||
word_interner,
|
||||
phrase_interner,
|
||||
phrase,
|
||||
)?;
|
||||
let phrase_docids = ctx.get_phrase_docids(phrase)?;
|
||||
if !phrase_docids.is_empty() {
|
||||
used_phrases.insert(phrase);
|
||||
used_left_phrases.insert(phrase);
|
||||
}
|
||||
universe &= phrase_docids;
|
||||
if universe.is_empty() {
|
||||
@ -153,36 +148,28 @@ fn compute_prefix_edges<'ctx>(
|
||||
}
|
||||
}
|
||||
|
||||
if let Some(new_docids) = db_cache.get_word_prefix_pair_proximity_docids(
|
||||
index,
|
||||
txn,
|
||||
word_interner,
|
||||
left_word,
|
||||
right_prefix,
|
||||
forward_proximity,
|
||||
)? {
|
||||
if let Some(new_docids) =
|
||||
ctx.get_db_word_prefix_pair_proximity_docids(left_word, right_prefix, forward_proximity)?
|
||||
{
|
||||
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
||||
if !new_docids.is_empty() {
|
||||
used_words.insert(left_word);
|
||||
used_words.insert(right_prefix);
|
||||
used_left_words.insert(left_word);
|
||||
used_right_prefix.insert(right_prefix);
|
||||
*docids |= new_docids;
|
||||
}
|
||||
}
|
||||
|
||||
// No swapping when computing the proximity between a phrase and a word
|
||||
if left_phrase.is_none() {
|
||||
if let Some(new_docids) = db_cache.get_prefix_word_pair_proximity_docids(
|
||||
index,
|
||||
txn,
|
||||
word_interner,
|
||||
if let Some(new_docids) = ctx.get_db_prefix_word_pair_proximity_docids(
|
||||
right_prefix,
|
||||
left_word,
|
||||
backward_proximity,
|
||||
)? {
|
||||
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
||||
if !new_docids.is_empty() {
|
||||
used_words.insert(left_word);
|
||||
used_words.insert(right_prefix);
|
||||
used_left_words.insert(left_word);
|
||||
used_right_prefix.insert(right_prefix);
|
||||
*docids |= new_docids;
|
||||
}
|
||||
}
|
||||
@ -191,72 +178,59 @@ fn compute_prefix_edges<'ctx>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn compute_non_prefix_edges<'ctx>(
|
||||
index: &Index,
|
||||
txn: &'ctx RoTxn,
|
||||
db_cache: &mut DatabaseCache<'ctx>,
|
||||
term_docids: &mut QueryTermDocIdsCache,
|
||||
word_interner: &DedupInterner<String>,
|
||||
phrase_interner: &DedupInterner<Phrase>,
|
||||
fn compute_non_prefix_edges(
|
||||
ctx: &mut SearchContext,
|
||||
word1: Interned<String>,
|
||||
word2: Interned<String>,
|
||||
phrases: &[Interned<Phrase>],
|
||||
left_phrase: Option<Interned<Phrase>>,
|
||||
right_phrase: Option<Interned<Phrase>>,
|
||||
forward_proximity: u8,
|
||||
backward_proximity: u8,
|
||||
docids: &mut RoaringBitmap,
|
||||
universe: &RoaringBitmap,
|
||||
used_words: &mut FxHashSet<Interned<String>>,
|
||||
used_phrases: &mut FxHashSet<Interned<Phrase>>,
|
||||
) -> Result<()> {
|
||||
let mut used_left_phrases = BTreeSet::new();
|
||||
let mut used_right_phrases = BTreeSet::new();
|
||||
let mut used_left_words = BTreeSet::new();
|
||||
let mut used_right_words = BTreeSet::new();
|
||||
|
||||
let mut universe = universe.clone();
|
||||
for phrase in phrases {
|
||||
let phrase_docids = term_docids.get_phrase_docids(
|
||||
index,
|
||||
txn,
|
||||
db_cache,
|
||||
word_interner,
|
||||
phrase_interner,
|
||||
*phrase,
|
||||
)?;
|
||||
if !phrase_docids.is_empty() {
|
||||
used_phrases.insert(*phrase);
|
||||
}
|
||||
|
||||
for phrase in left_phrase.iter().chain(right_phrase.iter()).copied() {
|
||||
let phrase_docids = ctx.get_phrase_docids(phrase)?;
|
||||
universe &= phrase_docids;
|
||||
if universe.is_empty() {
|
||||
return Ok(());
|
||||
}
|
||||
}
|
||||
if let Some(new_docids) = db_cache.get_word_pair_proximity_docids(
|
||||
index,
|
||||
txn,
|
||||
word_interner,
|
||||
word1,
|
||||
word2,
|
||||
forward_proximity,
|
||||
)? {
|
||||
if let Some(left_phrase) = left_phrase {
|
||||
used_left_phrases.insert(left_phrase);
|
||||
}
|
||||
if let Some(right_phrase) = right_phrase {
|
||||
used_right_phrases.insert(right_phrase);
|
||||
}
|
||||
|
||||
if let Some(new_docids) =
|
||||
ctx.get_db_word_pair_proximity_docids(word1, word2, forward_proximity)?
|
||||
{
|
||||
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
||||
if !new_docids.is_empty() {
|
||||
used_words.insert(word1);
|
||||
used_words.insert(word2);
|
||||
used_left_words.insert(word1);
|
||||
used_right_words.insert(word2);
|
||||
*docids |= new_docids;
|
||||
}
|
||||
}
|
||||
if backward_proximity >= 1
|
||||
// no swapping when either term is a phrase
|
||||
&& phrases.is_empty()
|
||||
&& left_phrase.is_none() && right_phrase.is_none()
|
||||
{
|
||||
if let Some(new_docids) = db_cache.get_word_pair_proximity_docids(
|
||||
index,
|
||||
txn,
|
||||
word_interner,
|
||||
word2,
|
||||
word1,
|
||||
backward_proximity,
|
||||
)? {
|
||||
if let Some(new_docids) =
|
||||
ctx.get_db_word_pair_proximity_docids(word2, word1, backward_proximity)?
|
||||
{
|
||||
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
||||
if !new_docids.is_empty() {
|
||||
used_words.insert(word1);
|
||||
used_words.insert(word2);
|
||||
used_left_words.insert(word2);
|
||||
used_right_words.insert(word1);
|
||||
*docids |= new_docids;
|
||||
}
|
||||
}
|
||||
@ -265,25 +239,41 @@ fn compute_non_prefix_edges<'ctx>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
fn last_word_of_term_iter<'t>(
|
||||
t: &'t QueryTerm,
|
||||
phrase_interner: &'t DedupInterner<Phrase>,
|
||||
) -> impl Iterator<Item = (Option<Interned<Phrase>>, Interned<String>)> + 't {
|
||||
t.all_single_words_except_prefix_db().map(|w| (None, w)).chain(t.all_phrases().flat_map(
|
||||
move |p| {
|
||||
let phrase = phrase_interner.get(p);
|
||||
phrase.words.last().unwrap().map(|last| (Some(p), last))
|
||||
},
|
||||
))
|
||||
fn last_words_of_term_derivations(
|
||||
ctx: &mut SearchContext,
|
||||
t: &QueryTermSubset,
|
||||
) -> Result<BTreeSet<(Option<Interned<Phrase>>, Interned<String>)>> {
|
||||
let mut result = BTreeSet::new();
|
||||
|
||||
for w in t.all_single_words_except_prefix_db(ctx)? {
|
||||
result.insert((None, w));
|
||||
}
|
||||
for p in t.all_phrases(ctx)? {
|
||||
let phrase = ctx.phrase_interner.get(p);
|
||||
let last_term_of_phrase = phrase.words.last().unwrap();
|
||||
if let Some(last_word) = last_term_of_phrase {
|
||||
result.insert((Some(p), *last_word));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
fn first_word_of_term_iter<'t>(
|
||||
t: &'t QueryTerm,
|
||||
phrase_interner: &'t DedupInterner<Phrase>,
|
||||
) -> impl Iterator<Item = (Interned<String>, Option<Interned<Phrase>>)> + 't {
|
||||
t.all_single_words_except_prefix_db().map(|w| (w, None)).chain(t.all_phrases().flat_map(
|
||||
move |p| {
|
||||
let phrase = phrase_interner.get(p);
|
||||
phrase.words.first().unwrap().map(|first| (first, Some(p)))
|
||||
},
|
||||
))
|
||||
fn first_word_of_term_iter(
|
||||
ctx: &mut SearchContext,
|
||||
t: &QueryTermSubset,
|
||||
) -> Result<BTreeSet<(Interned<String>, Option<Interned<Phrase>>)>> {
|
||||
let mut result = BTreeSet::new();
|
||||
let all_words = t.all_single_words_except_prefix_db(ctx)?;
|
||||
for w in all_words {
|
||||
result.insert((w, None));
|
||||
}
|
||||
for p in t.all_phrases(ctx)? {
|
||||
let phrase = ctx.phrase_interner.get(p);
|
||||
let first_term_of_phrase = phrase.words.first().unwrap();
|
||||
if let Some(first_word) = first_term_of_phrase {
|
||||
result.insert((*first_word, Some(p)));
|
||||
}
|
||||
}
|
||||
|
||||
Ok(result)
|
||||
}
|
||||
|
@ -1,27 +1,19 @@
|
||||
pub mod build;
|
||||
pub mod compute_docids;
|
||||
|
||||
use fxhash::FxHashSet;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use super::{ComputedCondition, DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||
use crate::search::new::logger::SearchLogger;
|
||||
use crate::search::new::query_term::{Phrase, QueryTerm};
|
||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub enum ProximityCondition {
|
||||
Uninit {
|
||||
left_term: Interned<QueryTerm>,
|
||||
right_term: Interned<QueryTerm>,
|
||||
right_term_ngram_len: u8,
|
||||
cost: u8,
|
||||
},
|
||||
Term {
|
||||
term: Interned<QueryTerm>,
|
||||
},
|
||||
Uninit { left_term: LocatedQueryTermSubset, right_term: LocatedQueryTermSubset, cost: u8 },
|
||||
Term { term: LocatedQueryTermSubset },
|
||||
}
|
||||
|
||||
pub enum ProximityGraph {}
|
||||
@ -33,18 +25,17 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
ctx: &mut SearchContext,
|
||||
condition: &Self::Condition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<(roaring::RoaringBitmap, FxHashSet<Interned<String>>, FxHashSet<Interned<Phrase>>)>
|
||||
{
|
||||
) -> Result<ComputedCondition> {
|
||||
compute_docids::compute_docids(ctx, condition, universe)
|
||||
}
|
||||
|
||||
fn build_edges(
|
||||
ctx: &mut SearchContext,
|
||||
conditions_interner: &mut DedupInterner<Self::Condition>,
|
||||
source_node: &QueryNode,
|
||||
dest_node: &QueryNode,
|
||||
) -> Result<Vec<(u8, Option<Interned<Self::Condition>>)>> {
|
||||
build::build_edges(ctx, conditions_interner, source_node, dest_node)
|
||||
source_term: Option<&LocatedQueryTermSubset>,
|
||||
dest_term: &LocatedQueryTermSubset,
|
||||
) -> Result<Vec<(u32, Interned<Self::Condition>)>> {
|
||||
build::build_edges(ctx, conditions_interner, source_term, dest_term)
|
||||
}
|
||||
|
||||
fn log_state(
|
||||
@ -52,8 +43,8 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
paths: &[Vec<Interned<ProximityCondition>>],
|
||||
dead_ends_cache: &DeadEndsCache<Self::Condition>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<QueryNode, Vec<u16>>,
|
||||
cost: u16,
|
||||
distances: &MappedInterner<QueryNode, Vec<u64>>,
|
||||
cost: u64,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) {
|
||||
logger.log_proximity_state(graph, paths, dead_ends_cache, universe, distances, cost);
|
||||
@ -66,8 +57,9 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
Ok(format!("{cost}: cost"))
|
||||
}
|
||||
ProximityCondition::Term { term } => {
|
||||
let term = ctx.term_interner.get(*term);
|
||||
Ok(format!("{} : exists", ctx.word_interner.get(term.original)))
|
||||
let original_term = ctx.term_interner.get(term.term_subset.original);
|
||||
let original_word = ctx.word_interner.get(original_term.original);
|
||||
Ok(format!("{original_word} : exists"))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user