mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 23:04:26 +01:00
Continue documenting and cleaning up the code
This commit is contained in:
parent
c232cdabf5
commit
2099991dd1
@ -85,15 +85,15 @@ fn remove_empty_edges<'search, G: RankingRuleGraphTrait>(
|
|||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
empty_paths_cache: &mut EmptyPathsCache,
|
empty_paths_cache: &mut EmptyPathsCache,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for edge_index in 0..graph.all_edges.len() as u16 {
|
for edge_index in 0..graph.edges_store.len() as u16 {
|
||||||
if graph.all_edges[edge_index as usize].is_none() {
|
if graph.edges_store[edge_index as usize].is_none() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?;
|
let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?;
|
||||||
match docids {
|
match docids {
|
||||||
BitmapOrAllRef::Bitmap(docids) => {
|
BitmapOrAllRef::Bitmap(docids) => {
|
||||||
if docids.is_disjoint(universe) {
|
if docids.is_disjoint(universe) {
|
||||||
graph.remove_edge(edge_index);
|
graph.remove_ranking_rule_edge(edge_index);
|
||||||
empty_paths_cache.forbid_edge(edge_index);
|
empty_paths_cache.forbid_edge(edge_index);
|
||||||
edge_docids_cache.cache.remove(&edge_index);
|
edge_docids_cache.cache.remove(&edge_index);
|
||||||
continue;
|
continue;
|
||||||
@ -120,7 +120,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
||||||
let mut edge_docids_cache = EdgeDocidsCache::default();
|
let mut edge_docids_cache = EdgeDocidsCache::default();
|
||||||
let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len() as u16);
|
let mut empty_paths_cache = EmptyPathsCache::new(graph.edges_store.len() as u16);
|
||||||
|
|
||||||
// First simplify the graph as much as possible, by computing the docids of the edges
|
// First simplify the graph as much as possible, by computing the docids of the edges
|
||||||
// within the rule's universe and removing the edges that have no associated docids.
|
// within the rule's universe and removing the edges that have no associated docids.
|
||||||
@ -242,7 +242,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
// 1. Store in the cache that this edge is empty for this universe
|
// 1. Store in the cache that this edge is empty for this universe
|
||||||
empty_paths_cache.forbid_edge(edge_index);
|
empty_paths_cache.forbid_edge(edge_index);
|
||||||
// 2. remove this edge from the ranking rule graph
|
// 2. remove this edge from the ranking rule graph
|
||||||
graph.remove_edge(edge_index);
|
graph.remove_ranking_rule_edge(edge_index);
|
||||||
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
|
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
|
||||||
edge_docids_cache.cache.remove(&edge_index);
|
edge_docids_cache.cache.remove(&edge_index);
|
||||||
return Ok(());
|
return Ok(());
|
||||||
|
@ -8,7 +8,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||||
use crate::search::new::ranking_rule_graph::{
|
use crate::search::new::ranking_rule_graph::{
|
||||||
Edge, EdgeDetails, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
|
Edge, EdgeCondition, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
|
||||||
TypoGraph,
|
TypoGraph,
|
||||||
};
|
};
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
@ -534,24 +534,24 @@ shape: class"
|
|||||||
let distances = &distances[node_idx];
|
let distances = &distances[node_idx];
|
||||||
Self::query_node_d2_desc(ctx, node_idx, node, distances.as_slice(), file);
|
Self::query_node_d2_desc(ctx, node_idx, node, distances.as_slice(), file);
|
||||||
}
|
}
|
||||||
for edge in graph.all_edges.iter().flatten() {
|
for edge in graph.edges_store.iter().flatten() {
|
||||||
let Edge { from_node, to_node, details, .. } = edge;
|
let Edge { source_node, dest_node, condition: details, .. } = edge;
|
||||||
|
|
||||||
match &details {
|
match &details {
|
||||||
EdgeDetails::Unconditional => {
|
EdgeCondition::Unconditional => {
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"{from_node} -> {to_node} : \"always cost {cost}\"",
|
"{source_node} -> {dest_node} : \"always cost {cost}\"",
|
||||||
cost = edge.cost,
|
cost = edge.cost,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
EdgeDetails::Data(details) => {
|
EdgeCondition::Conditional(details) => {
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
|
"{source_node} -> {dest_node} : \"cost {cost} {edge_label}\"",
|
||||||
cost = edge.cost,
|
cost = edge.cost,
|
||||||
edge_label = R::graphviz_edge_details_label(details)
|
edge_label = R::label_for_edge_condition(details)
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
@ -589,10 +589,10 @@ shape: class"
|
|||||||
edge_idx: u16,
|
edge_idx: u16,
|
||||||
file: &mut File,
|
file: &mut File,
|
||||||
) {
|
) {
|
||||||
let Edge { from_node, to_node, cost, .. } =
|
let Edge { source_node, dest_node, cost, .. } =
|
||||||
graph.all_edges[edge_idx as usize].as_ref().unwrap();
|
graph.edges_store[edge_idx as usize].as_ref().unwrap();
|
||||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
let source_node = &graph.query_graph.nodes[*source_node as usize];
|
||||||
let from_node_desc = match from_node {
|
let source_node_desc = match source_node {
|
||||||
QueryNode::Term(term) => match &term.value {
|
QueryNode::Term(term) => match &term.value {
|
||||||
QueryTerm::Phrase { phrase } => {
|
QueryTerm::Phrase { phrase } => {
|
||||||
let phrase = ctx.phrase_interner.get(*phrase);
|
let phrase = ctx.phrase_interner.get(*phrase);
|
||||||
@ -606,8 +606,8 @@ shape: class"
|
|||||||
QueryNode::Start => "START".to_owned(),
|
QueryNode::Start => "START".to_owned(),
|
||||||
QueryNode::End => "END".to_owned(),
|
QueryNode::End => "END".to_owned(),
|
||||||
};
|
};
|
||||||
let to_node = &graph.query_graph.nodes[*to_node as usize];
|
let dest_node = &graph.query_graph.nodes[*dest_node as usize];
|
||||||
let to_node_desc = match to_node {
|
let dest_node_desc = match dest_node {
|
||||||
QueryNode::Term(term) => match &term.value {
|
QueryNode::Term(term) => match &term.value {
|
||||||
QueryTerm::Phrase { phrase } => {
|
QueryTerm::Phrase { phrase } => {
|
||||||
let phrase = ctx.phrase_interner.get(*phrase);
|
let phrase = ctx.phrase_interner.get(*phrase);
|
||||||
@ -623,7 +623,7 @@ shape: class"
|
|||||||
};
|
};
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
"{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
|
"{edge_idx}: \"{source_node_desc}->{dest_node_desc} [{cost}]\" {{
|
||||||
shape: class
|
shape: class
|
||||||
}}"
|
}}"
|
||||||
)
|
)
|
||||||
|
@ -1,6 +1,3 @@
|
|||||||
// TODO: put primitive query part in here
|
|
||||||
|
|
||||||
use std::borrow::Cow;
|
|
||||||
use std::mem;
|
use std::mem;
|
||||||
use std::ops::RangeInclusive;
|
use std::ops::RangeInclusive;
|
||||||
|
|
||||||
@ -18,6 +15,8 @@ use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
|||||||
use crate::search::{build_dfa, get_first};
|
use crate::search::{build_dfa, get_first};
|
||||||
use crate::{CboRoaringBitmapLenCodec, Index, Result};
|
use crate::{CboRoaringBitmapLenCodec, Index, Result};
|
||||||
|
|
||||||
|
/// A phrase in the user's search query, consisting of several words
|
||||||
|
/// that must appear side-by-side in the search results.
|
||||||
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct Phrase {
|
pub struct Phrase {
|
||||||
pub words: Vec<Option<Interned<String>>>,
|
pub words: Vec<Option<Interned<String>>>,
|
||||||
@ -28,18 +27,38 @@ impl Phrase {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A structure storing all the different ways to match
|
||||||
|
/// a term in the user's search query.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct WordDerivations {
|
pub struct WordDerivations {
|
||||||
|
/// The original word
|
||||||
pub original: Interned<String>,
|
pub original: Interned<String>,
|
||||||
// TODO: pub prefix_of: Vec<String>,
|
// TODO: original should only be used for debugging purposes?
|
||||||
|
// TODO: pub zero_typo: Option<Interned<String>>,
|
||||||
|
// TODO: pub prefix_of: Box<[Interned<String>]>,
|
||||||
|
/// All the synonyms of the original word
|
||||||
pub synonyms: Box<[Interned<Phrase>]>,
|
pub synonyms: Box<[Interned<Phrase>]>,
|
||||||
|
|
||||||
|
/// The original word split into multiple consecutive words
|
||||||
pub split_words: Option<Interned<Phrase>>,
|
pub split_words: Option<Interned<Phrase>>,
|
||||||
|
|
||||||
|
/// The original words and words which are prefixed by it
|
||||||
pub zero_typo: Box<[Interned<String>]>,
|
pub zero_typo: Box<[Interned<String>]>,
|
||||||
|
|
||||||
|
/// Words that are 1 typo away from the original word
|
||||||
pub one_typo: Box<[Interned<String>]>,
|
pub one_typo: Box<[Interned<String>]>,
|
||||||
|
|
||||||
|
/// Words that are 2 typos away from the original word
|
||||||
pub two_typos: Box<[Interned<String>]>,
|
pub two_typos: Box<[Interned<String>]>,
|
||||||
|
|
||||||
|
/// True if the prefix databases must be used to retrieve
|
||||||
|
/// the words which are prefixed by the original word.
|
||||||
pub use_prefix_db: bool,
|
pub use_prefix_db: bool,
|
||||||
}
|
}
|
||||||
impl WordDerivations {
|
impl WordDerivations {
|
||||||
|
/// Return an iterator over all the single words derived from the original word.
|
||||||
|
///
|
||||||
|
/// This excludes synonyms, split words, and words stored in the prefix databases.
|
||||||
pub fn all_derivations_except_prefix_db(
|
pub fn all_derivations_except_prefix_db(
|
||||||
&'_ self,
|
&'_ self,
|
||||||
) -> impl Iterator<Item = Interned<String>> + Clone + '_ {
|
) -> impl Iterator<Item = Interned<String>> + Clone + '_ {
|
||||||
@ -49,17 +68,20 @@ impl WordDerivations {
|
|||||||
self.zero_typo.is_empty()
|
self.zero_typo.is_empty()
|
||||||
&& self.one_typo.is_empty()
|
&& self.one_typo.is_empty()
|
||||||
&& self.two_typos.is_empty()
|
&& self.two_typos.is_empty()
|
||||||
|
&& self.synonyms.is_empty()
|
||||||
|
&& self.split_words.is_none()
|
||||||
&& !self.use_prefix_db
|
&& !self.use_prefix_db
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Compute the word derivations for the given word
|
||||||
pub fn word_derivations(
|
pub fn word_derivations(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
word: &str,
|
word: &str,
|
||||||
max_typo: u8,
|
max_typo: u8,
|
||||||
is_prefix: bool,
|
is_prefix: bool,
|
||||||
fst: &fst::Set<Cow<[u8]>>,
|
|
||||||
) -> Result<WordDerivations> {
|
) -> Result<WordDerivations> {
|
||||||
|
let fst = ctx.index.words_fst(ctx.txn)?;
|
||||||
let word_interned = ctx.word_interner.insert(word.to_owned());
|
let word_interned = ctx.word_interner.insert(word.to_owned());
|
||||||
|
|
||||||
let use_prefix_db = is_prefix
|
let use_prefix_db = is_prefix
|
||||||
@ -171,6 +193,10 @@ pub fn word_derivations(
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Split the original word into the two words that appear the
|
||||||
|
/// most next to each other in the index.
|
||||||
|
///
|
||||||
|
/// Return `None` if the original word cannot be split.
|
||||||
fn split_best_frequency(
|
fn split_best_frequency(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &RoTxn,
|
txn: &RoTxn,
|
||||||
@ -199,16 +225,12 @@ fn split_best_frequency(
|
|||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub enum QueryTerm {
|
pub enum QueryTerm {
|
||||||
// TODO: should there be SplitWord, NGram2, and NGram3 variants?
|
|
||||||
// NGram2 can have 1 typo and synonyms
|
|
||||||
// NGram3 cannot have typos but can have synonyms
|
|
||||||
// SplitWords are a phrase
|
|
||||||
// Can NGrams be prefixes?
|
|
||||||
Phrase { phrase: Interned<Phrase> },
|
Phrase { phrase: Interned<Phrase> },
|
||||||
Word { derivations: WordDerivations },
|
Word { derivations: WordDerivations },
|
||||||
}
|
}
|
||||||
|
|
||||||
impl QueryTerm {
|
impl QueryTerm {
|
||||||
|
/// Return the original word from the given query term
|
||||||
pub fn original_single_word<'interner>(
|
pub fn original_single_word<'interner>(
|
||||||
&self,
|
&self,
|
||||||
word_interner: &'interner Interner<String>,
|
word_interner: &'interner Interner<String>,
|
||||||
@ -226,6 +248,7 @@ impl QueryTerm {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A query term term coupled with its position in the user's search query.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct LocatedQueryTerm {
|
pub struct LocatedQueryTerm {
|
||||||
pub value: QueryTerm,
|
pub value: QueryTerm,
|
||||||
@ -233,14 +256,18 @@ pub struct LocatedQueryTerm {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl LocatedQueryTerm {
|
impl LocatedQueryTerm {
|
||||||
|
/// Return `true` iff the word derivations within the query term are empty
|
||||||
pub fn is_empty(&self) -> bool {
|
pub fn is_empty(&self) -> bool {
|
||||||
match &self.value {
|
match &self.value {
|
||||||
|
// TODO: phrases should be greedily computed, so that they can be excluded from
|
||||||
|
// the query graph right from the start?
|
||||||
QueryTerm::Phrase { phrase: _ } => false,
|
QueryTerm::Phrase { phrase: _ } => false,
|
||||||
QueryTerm::Word { derivations, .. } => derivations.is_empty(),
|
QueryTerm::Word { derivations, .. } => derivations.is_empty(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Convert the tokenised search query into a list of located query terms.
|
||||||
pub fn located_query_terms_from_string<'search>(
|
pub fn located_query_terms_from_string<'search>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
query: NormalizedTokenIter<Vec<u8>>,
|
query: NormalizedTokenIter<Vec<u8>>,
|
||||||
@ -250,8 +277,8 @@ pub fn located_query_terms_from_string<'search>(
|
|||||||
let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?;
|
let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?;
|
||||||
let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?;
|
let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?;
|
||||||
|
|
||||||
|
// TODO: should `exact_words` also disable prefix search, ngrams, split words, or synonyms?
|
||||||
let exact_words = ctx.index.exact_words(ctx.txn)?;
|
let exact_words = ctx.index.exact_words(ctx.txn)?;
|
||||||
let fst = ctx.index.words_fst(ctx.txn)?;
|
|
||||||
|
|
||||||
let nbr_typos = |word: &str| {
|
let nbr_typos = |word: &str| {
|
||||||
if !authorize_typos
|
if !authorize_typos
|
||||||
@ -266,9 +293,9 @@ pub fn located_query_terms_from_string<'search>(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
let mut primitive_query = Vec::new();
|
let mut located_terms = Vec::new();
|
||||||
let mut phrase = Vec::new();
|
|
||||||
|
|
||||||
|
let mut phrase = Vec::new();
|
||||||
let mut quoted = false;
|
let mut quoted = false;
|
||||||
|
|
||||||
let parts_limit = words_limit.unwrap_or(usize::MAX);
|
let parts_limit = words_limit.unwrap_or(usize::MAX);
|
||||||
@ -280,8 +307,8 @@ pub fn located_query_terms_from_string<'search>(
|
|||||||
let mut peekable = query.peekable();
|
let mut peekable = query.peekable();
|
||||||
while let Some(token) = peekable.next() {
|
while let Some(token) = peekable.next() {
|
||||||
// early return if word limit is exceeded
|
// early return if word limit is exceeded
|
||||||
if primitive_query.len() >= parts_limit {
|
if located_terms.len() >= parts_limit {
|
||||||
return Ok(primitive_query);
|
return Ok(located_terms);
|
||||||
}
|
}
|
||||||
|
|
||||||
match token.kind {
|
match token.kind {
|
||||||
@ -307,24 +334,23 @@ pub fn located_query_terms_from_string<'search>(
|
|||||||
match token.kind {
|
match token.kind {
|
||||||
TokenKind::Word => {
|
TokenKind::Word => {
|
||||||
let word = token.lemma();
|
let word = token.lemma();
|
||||||
let derivations =
|
let derivations = word_derivations(ctx, word, nbr_typos(word), false)?;
|
||||||
word_derivations(ctx, word, nbr_typos(word), false, &fst)?;
|
|
||||||
let located_term = LocatedQueryTerm {
|
let located_term = LocatedQueryTerm {
|
||||||
value: QueryTerm::Word { derivations },
|
value: QueryTerm::Word { derivations },
|
||||||
positions: position..=position,
|
positions: position..=position,
|
||||||
};
|
};
|
||||||
primitive_query.push(located_term);
|
located_terms.push(located_term);
|
||||||
}
|
}
|
||||||
TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {}
|
TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {}
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let word = token.lemma();
|
let word = token.lemma();
|
||||||
let derivations = word_derivations(ctx, word, nbr_typos(word), true, &fst)?;
|
let derivations = word_derivations(ctx, word, nbr_typos(word), true)?;
|
||||||
let located_term = LocatedQueryTerm {
|
let located_term = LocatedQueryTerm {
|
||||||
value: QueryTerm::Word { derivations },
|
value: QueryTerm::Word { derivations },
|
||||||
positions: position..=position,
|
positions: position..=position,
|
||||||
};
|
};
|
||||||
primitive_query.push(located_term);
|
located_terms.push(located_term);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
TokenKind::Separator(separator_kind) => {
|
TokenKind::Separator(separator_kind) => {
|
||||||
@ -352,7 +378,7 @@ pub fn located_query_terms_from_string<'search>(
|
|||||||
},
|
},
|
||||||
positions: phrase_start..=phrase_end,
|
positions: phrase_start..=phrase_end,
|
||||||
};
|
};
|
||||||
primitive_query.push(located_query_term);
|
located_terms.push(located_query_term);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
_ => (),
|
_ => (),
|
||||||
@ -367,10 +393,10 @@ pub fn located_query_terms_from_string<'search>(
|
|||||||
},
|
},
|
||||||
positions: phrase_start..=phrase_end,
|
positions: phrase_start..=phrase_end,
|
||||||
};
|
};
|
||||||
primitive_query.push(located_query_term);
|
located_terms.push(located_query_term);
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(primitive_query)
|
Ok(located_terms)
|
||||||
}
|
}
|
||||||
|
|
||||||
// TODO: return a word derivations instead?
|
// TODO: return a word derivations instead?
|
||||||
@ -396,6 +422,8 @@ pub fn ngram2(
|
|||||||
_ => None,
|
_ => None,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// TODO: return a word derivations instead?
|
||||||
pub fn ngram3(
|
pub fn ngram3(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
x: &LocatedQueryTerm,
|
x: &LocatedQueryTerm,
|
||||||
|
@ -6,49 +6,43 @@ use crate::search::new::{QueryGraph, SearchContext};
|
|||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||||
|
/// Build the ranking rule graph from the given query graph
|
||||||
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
||||||
let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph;
|
let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph;
|
||||||
|
|
||||||
let mut all_edges = vec![];
|
let mut edges_store = vec![];
|
||||||
let mut node_edges = vec![];
|
let mut edges_of_node = vec![];
|
||||||
let mut successors = vec![];
|
|
||||||
|
|
||||||
for (node_idx, node) in graph_nodes.iter().enumerate() {
|
for (node_idx, node) in graph_nodes.iter().enumerate() {
|
||||||
node_edges.push(HashSet::new());
|
edges_of_node.push(HashSet::new());
|
||||||
successors.push(HashSet::new());
|
let new_edges = edges_of_node.last_mut().unwrap();
|
||||||
let new_edges = node_edges.last_mut().unwrap();
|
|
||||||
let new_successors = successors.last_mut().unwrap();
|
|
||||||
|
|
||||||
let Some(from_node_data) = G::build_visit_from_node(ctx, node)? else { continue };
|
let Some(source_node_data) = G::build_step_visit_source_node(ctx, node)? else { continue };
|
||||||
|
|
||||||
for successor_idx in graph_edges[node_idx].successors.iter() {
|
for successor_idx in graph_edges[node_idx].successors.iter() {
|
||||||
let to_node = &graph_nodes[successor_idx as usize];
|
let dest_node = &graph_nodes[successor_idx as usize];
|
||||||
let mut edges = G::build_visit_to_node(ctx, to_node, &from_node_data)?;
|
let edges =
|
||||||
|
G::build_step_visit_destination_node(ctx, dest_node, &source_node_data)?;
|
||||||
if edges.is_empty() {
|
if edges.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
edges.sort_by_key(|e| e.0);
|
|
||||||
for (cost, details) in edges {
|
for (cost, details) in edges {
|
||||||
all_edges.push(Some(Edge {
|
edges_store.push(Some(Edge {
|
||||||
from_node: node_idx as u16,
|
source_node: node_idx as u16,
|
||||||
to_node: successor_idx,
|
dest_node: successor_idx,
|
||||||
cost,
|
cost,
|
||||||
details,
|
condition: details,
|
||||||
}));
|
}));
|
||||||
new_edges.insert(all_edges.len() as u16 - 1);
|
new_edges.insert(edges_store.len() as u16 - 1);
|
||||||
new_successors.insert(successor_idx);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let node_edges = node_edges
|
let edges_of_node = edges_of_node
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), all_edges.len() as u16))
|
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), edges_store.len() as u16))
|
||||||
.collect();
|
|
||||||
let successors = successors
|
|
||||||
.into_iter()
|
|
||||||
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), all_edges.len() as u16))
|
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
Ok(RankingRuleGraph { query_graph, all_edges, node_edges, successors })
|
Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -30,7 +30,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
empty_paths_cache,
|
empty_paths_cache,
|
||||||
&mut visit,
|
&mut visit,
|
||||||
&mut vec![],
|
&mut vec![],
|
||||||
&mut SmallBitmap::new(self.all_edges.len() as u16),
|
&mut SmallBitmap::new(self.edges_store.len() as u16),
|
||||||
empty_paths_cache.empty_edges.clone(),
|
empty_paths_cache.empty_edges.clone(),
|
||||||
)?;
|
)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -48,12 +48,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
) -> Result<bool> {
|
) -> Result<bool> {
|
||||||
let mut any_valid = false;
|
let mut any_valid = false;
|
||||||
|
|
||||||
let edges = self.node_edges[from].clone();
|
let edges = self.edges_of_node[from].clone();
|
||||||
for edge_idx in edges.iter() {
|
for edge_idx in edges.iter() {
|
||||||
let Some(edge) = self.all_edges[edge_idx as usize].as_ref() else { continue };
|
let Some(edge) = self.edges_store[edge_idx as usize].as_ref() else { continue };
|
||||||
if cost < edge.cost as u16
|
if cost < edge.cost as u16
|
||||||
|| forbidden_edges.contains(edge_idx)
|
|| forbidden_edges.contains(edge_idx)
|
||||||
|| !all_distances[edge.to_node as usize].iter().any(
|
|| !all_distances[edge.dest_node as usize].iter().any(
|
||||||
|(next_cost, necessary_edges)| {
|
|(next_cost, necessary_edges)| {
|
||||||
(*next_cost == cost - edge.cost as u16)
|
(*next_cost == cost - edge.cost as u16)
|
||||||
&& !forbidden_edges.intersects(necessary_edges)
|
&& !forbidden_edges.intersects(necessary_edges)
|
||||||
@ -71,13 +71,13 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
new_forbidden_edges.insert(x);
|
new_forbidden_edges.insert(x);
|
||||||
});
|
});
|
||||||
|
|
||||||
let next_any_valid = if edge.to_node == self.query_graph.end_node {
|
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
||||||
any_valid = true;
|
any_valid = true;
|
||||||
visit(prev_edges, self, empty_paths_cache)?;
|
visit(prev_edges, self, empty_paths_cache)?;
|
||||||
true
|
true
|
||||||
} else {
|
} else {
|
||||||
self.visit_paths_of_cost_rec(
|
self.visit_paths_of_cost_rec(
|
||||||
edge.to_node as usize,
|
edge.dest_node as usize,
|
||||||
cost - edge.cost as u16,
|
cost - edge.cost as u16,
|
||||||
all_distances,
|
all_distances,
|
||||||
empty_paths_cache,
|
empty_paths_cache,
|
||||||
@ -115,7 +115,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
let mut node_stack = VecDeque::new();
|
let mut node_stack = VecDeque::new();
|
||||||
|
|
||||||
distances_to_end[self.query_graph.end_node as usize] =
|
distances_to_end[self.query_graph.end_node as usize] =
|
||||||
vec![(0, SmallBitmap::new(self.all_edges.len() as u16))];
|
vec![(0, SmallBitmap::new(self.edges_store.len() as u16))];
|
||||||
|
|
||||||
for prev_node in
|
for prev_node in
|
||||||
self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter()
|
self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter()
|
||||||
@ -127,15 +127,15 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
while let Some(cur_node) = node_stack.pop_front() {
|
while let Some(cur_node) = node_stack.pop_front() {
|
||||||
let mut self_distances = BTreeMap::<u16, SmallBitmap>::new();
|
let mut self_distances = BTreeMap::<u16, SmallBitmap>::new();
|
||||||
|
|
||||||
let cur_node_edges = &self.node_edges[cur_node];
|
let cur_node_edges = &self.edges_of_node[cur_node];
|
||||||
for edge_idx in cur_node_edges.iter() {
|
for edge_idx in cur_node_edges.iter() {
|
||||||
let edge = self.all_edges[edge_idx as usize].as_ref().unwrap();
|
let edge = self.edges_store[edge_idx as usize].as_ref().unwrap();
|
||||||
let succ_node = edge.to_node;
|
let succ_node = edge.dest_node;
|
||||||
let succ_distances = &distances_to_end[succ_node as usize];
|
let succ_distances = &distances_to_end[succ_node as usize];
|
||||||
for (succ_distance, succ_necessary_edges) in succ_distances {
|
for (succ_distance, succ_necessary_edges) in succ_distances {
|
||||||
let potential_necessary_edges = SmallBitmap::from_iter(
|
let potential_necessary_edges = SmallBitmap::from_iter(
|
||||||
std::iter::once(edge_idx).chain(succ_necessary_edges.iter()),
|
std::iter::once(edge_idx).chain(succ_necessary_edges.iter()),
|
||||||
self.all_edges.len() as u16,
|
self.edges_store.len() as u16,
|
||||||
);
|
);
|
||||||
match self_distances.entry(edge.cost as u16 + succ_distance) {
|
match self_distances.entry(edge.cost as u16 + succ_distance) {
|
||||||
Entry::Occupied(mut prev_necessary_edges) => {
|
Entry::Occupied(mut prev_necessary_edges) => {
|
||||||
|
@ -3,28 +3,13 @@ use std::marker::PhantomData;
|
|||||||
use fxhash::FxHashMap;
|
use fxhash::FxHashMap;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::search::new::{BitmapOrAllRef, SearchContext};
|
use crate::search::new::{BitmapOrAllRef, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
// TODO: the cache should have a G::EdgeDetails as key
|
/// A cache storing the document ids associated with each ranking rule edge
|
||||||
// but then it means that we should have a quick way of
|
|
||||||
// computing their hash and comparing them
|
|
||||||
// which can be done...
|
|
||||||
// by using a pointer (real, Rc, bumpalo, or in a vector)???
|
|
||||||
//
|
|
||||||
// But actually.... the edge details' docids are a subset of the universe at the
|
|
||||||
// moment they were computed.
|
|
||||||
// But the universes between two iterations of a ranking rule are completely different
|
|
||||||
// Thus, there is no point in doing this.
|
|
||||||
// UNLESS...
|
|
||||||
// we compute the whole docids corresponding to the edge details (potentially expensive in time and memory
|
|
||||||
// in the common case)
|
|
||||||
//
|
|
||||||
// But we could still benefit within a single iteration for requests like:
|
|
||||||
// `a a a a a a a a a` where we have many of the same edge details, repeated
|
|
||||||
|
|
||||||
pub struct EdgeDocidsCache<G: RankingRuleGraphTrait> {
|
pub struct EdgeDocidsCache<G: RankingRuleGraphTrait> {
|
||||||
|
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
||||||
pub cache: FxHashMap<u16, RoaringBitmap>,
|
pub cache: FxHashMap<u16, RoaringBitmap>,
|
||||||
_phantom: PhantomData<G>,
|
_phantom: PhantomData<G>,
|
||||||
}
|
}
|
||||||
@ -34,19 +19,24 @@ impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
||||||
|
/// Retrieve the document ids for the given edge condition.
|
||||||
|
///
|
||||||
|
/// If the cache does not yet contain these docids, they are computed
|
||||||
|
/// and inserted in the cache.
|
||||||
pub fn get_edge_docids<'s, 'search>(
|
pub fn get_edge_docids<'s, 'search>(
|
||||||
&'s mut self,
|
&'s mut self,
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
|
// TODO: should be Interned<EdgeCondition>
|
||||||
edge_index: u16,
|
edge_index: u16,
|
||||||
graph: &RankingRuleGraph<G>,
|
graph: &RankingRuleGraph<G>,
|
||||||
// TODO: maybe universe doesn't belong here
|
// TODO: maybe universe doesn't belong here
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<BitmapOrAllRef<'s>> {
|
) -> Result<BitmapOrAllRef<'s>> {
|
||||||
let edge = graph.all_edges[edge_index as usize].as_ref().unwrap();
|
let edge = graph.edges_store[edge_index as usize].as_ref().unwrap();
|
||||||
|
|
||||||
match &edge.details {
|
match &edge.condition {
|
||||||
EdgeDetails::Unconditional => Ok(BitmapOrAllRef::All),
|
EdgeCondition::Unconditional => Ok(BitmapOrAllRef::All),
|
||||||
EdgeDetails::Data(details) => {
|
EdgeCondition::Conditional(details) => {
|
||||||
if self.cache.contains_key(&edge_index) {
|
if self.cache.contains_key(&edge_index) {
|
||||||
// TODO: should we update the bitmap in the cache if the new universe
|
// TODO: should we update the bitmap in the cache if the new universe
|
||||||
// reduces it?
|
// reduces it?
|
||||||
@ -56,7 +46,7 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
|||||||
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
|
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
|
||||||
}
|
}
|
||||||
// TODO: maybe universe doesn't belong here
|
// TODO: maybe universe doesn't belong here
|
||||||
let docids = universe & G::compute_docids(ctx, details, universe)?;
|
let docids = universe & G::resolve_edge_condition(ctx, details, universe)?;
|
||||||
let _ = self.cache.insert(edge_index, docids);
|
let _ = self.cache.insert(edge_index, docids);
|
||||||
let docids = &self.cache[&edge_index];
|
let docids = &self.cache[&edge_index];
|
||||||
Ok(BitmapOrAllRef::Bitmap(docids))
|
Ok(BitmapOrAllRef::Bitmap(docids))
|
||||||
|
@ -1,20 +1,29 @@
|
|||||||
use super::paths_map::PathsMap;
|
use super::paths_map::PathSet;
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
|
|
||||||
|
/// A cache which stores sufficient conditions for a path
|
||||||
|
/// to resolve to an empty set of candidates within the current
|
||||||
|
/// universe.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct EmptyPathsCache {
|
pub struct EmptyPathsCache {
|
||||||
|
/// The set of edge indexes that resolve to no documents.
|
||||||
pub empty_edges: SmallBitmap,
|
pub empty_edges: SmallBitmap,
|
||||||
pub empty_prefixes: PathsMap<()>,
|
/// A set of path prefixes that resolve to no documents.
|
||||||
|
pub empty_prefixes: PathSet,
|
||||||
|
/// A set of empty couple of edge indexes that resolve to no documents.
|
||||||
pub empty_couple_edges: Vec<SmallBitmap>,
|
pub empty_couple_edges: Vec<SmallBitmap>,
|
||||||
}
|
}
|
||||||
impl EmptyPathsCache {
|
impl EmptyPathsCache {
|
||||||
|
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
||||||
pub fn new(all_edges_len: u16) -> Self {
|
pub fn new(all_edges_len: u16) -> Self {
|
||||||
Self {
|
Self {
|
||||||
empty_edges: SmallBitmap::new(all_edges_len),
|
empty_edges: SmallBitmap::new(all_edges_len),
|
||||||
empty_prefixes: PathsMap::default(),
|
empty_prefixes: PathSet::default(),
|
||||||
empty_couple_edges: vec![SmallBitmap::new(all_edges_len); all_edges_len as usize],
|
empty_couple_edges: vec![SmallBitmap::new(all_edges_len); all_edges_len as usize],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Store in the cache that every path containing the given edge resolves to no documents.
|
||||||
pub fn forbid_edge(&mut self, edge_idx: u16) {
|
pub fn forbid_edge(&mut self, edge_idx: u16) {
|
||||||
self.empty_edges.insert(edge_idx);
|
self.empty_edges.insert(edge_idx);
|
||||||
self.empty_couple_edges[edge_idx as usize].clear();
|
self.empty_couple_edges[edge_idx as usize].clear();
|
||||||
@ -23,12 +32,17 @@ impl EmptyPathsCache {
|
|||||||
edges2.remove(edge_idx);
|
edges2.remove(edge_idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/// Store in the cache that every path containing the given prefix resolves to no documents.
|
||||||
pub fn forbid_prefix(&mut self, prefix: &[u16]) {
|
pub fn forbid_prefix(&mut self, prefix: &[u16]) {
|
||||||
self.empty_prefixes.insert(prefix.iter().copied(), ());
|
self.empty_prefixes.insert(prefix.iter().copied());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Store in the cache that every path containing the two given edges resolves to no documents.
|
||||||
pub fn forbid_couple_edges(&mut self, edge1: u16, edge2: u16) {
|
pub fn forbid_couple_edges(&mut self, edge1: u16, edge2: u16) {
|
||||||
self.empty_couple_edges[edge1 as usize].insert(edge2);
|
self.empty_couple_edges[edge1 as usize].insert(edge2);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Returns true if the cache can determine that the given path resolves to no documents.
|
||||||
pub fn path_is_empty(&self, path: &[u16], path_bitmap: &SmallBitmap) -> bool {
|
pub fn path_is_empty(&self, path: &[u16], path_bitmap: &SmallBitmap) -> bool {
|
||||||
if path_bitmap.intersects(&self.empty_edges) {
|
if path_bitmap.intersects(&self.empty_edges) {
|
||||||
return true;
|
return true;
|
||||||
|
@ -1,9 +1,19 @@
|
|||||||
|
/*! Module implementing the graph used for the graph-based ranking rules
|
||||||
|
and its related algorithms.
|
||||||
|
|
||||||
|
A ranking rule graph is built on top of the [`QueryGraph`]: the nodes stay
|
||||||
|
the same but the edges are replaced.
|
||||||
|
*/
|
||||||
|
|
||||||
mod build;
|
mod build;
|
||||||
mod cheapest_paths;
|
mod cheapest_paths;
|
||||||
mod edge_docids_cache;
|
mod edge_docids_cache;
|
||||||
mod empty_paths_cache;
|
mod empty_paths_cache;
|
||||||
mod paths_map;
|
mod paths_map;
|
||||||
|
|
||||||
|
/// Implementation of the `proximity` ranking rule
|
||||||
mod proximity;
|
mod proximity;
|
||||||
|
/// Implementation of the `typo` ranking rule
|
||||||
mod typo;
|
mod typo;
|
||||||
|
|
||||||
pub use edge_docids_cache::EdgeDocidsCache;
|
pub use edge_docids_cache::EdgeDocidsCache;
|
||||||
@ -17,30 +27,38 @@ use super::small_bitmap::SmallBitmap;
|
|||||||
use super::{QueryGraph, QueryNode, SearchContext};
|
use super::{QueryGraph, QueryNode, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
|
/// The condition that is associated with an edge in the ranking rule graph.
|
||||||
|
///
|
||||||
|
/// Some edges are unconditional, which means that traversing them does not reduce
|
||||||
|
/// the set of candidates.
|
||||||
|
///
|
||||||
|
/// Most edges, however, have a condition attached to them. For example, for the
|
||||||
|
/// proximity ranking rule, the condition could be that a word is N-close to another one.
|
||||||
|
/// When the edge is traversed, some database operations are executed to retrieve the set
|
||||||
|
/// of documents that satisfy the condition, which reduces the list of candidate document ids.
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum EdgeDetails<E> {
|
pub enum EdgeCondition<E> {
|
||||||
Unconditional,
|
Unconditional,
|
||||||
Data(E),
|
Conditional(E),
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// An edge in the ranking rule graph.
|
||||||
|
///
|
||||||
|
/// It contains:
|
||||||
|
/// 1. The source and destination nodes
|
||||||
|
/// 2. The cost of traversing this edge
|
||||||
|
/// 3. The condition associated with it
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Edge<E> {
|
pub struct Edge<E> {
|
||||||
pub from_node: u16,
|
pub source_node: u16,
|
||||||
pub to_node: u16,
|
pub dest_node: u16,
|
||||||
pub cost: u8,
|
pub cost: u8,
|
||||||
pub details: EdgeDetails<E>,
|
pub condition: EdgeCondition<E>,
|
||||||
}
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
|
||||||
pub struct EdgePointer<'graph, E> {
|
|
||||||
pub index: u16,
|
|
||||||
pub edge: &'graph Edge<E>,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// pub struct SubWordDerivations {
|
// pub struct SubWordDerivations {
|
||||||
// words: FxHashSet<Interned<String>>,
|
// words: FxHashSet<Interned<String>>,
|
||||||
// synonyms: FxHashSet<Interned<Phrase>>, // NO! they're phrases, not strings
|
// phrases: FxHashSet<Interned<Phrase>>,
|
||||||
// split_words: bool,
|
|
||||||
// use_prefix_db: bool,
|
// use_prefix_db: bool,
|
||||||
// }
|
// }
|
||||||
|
|
||||||
@ -74,46 +92,55 @@ pub struct EdgePointer<'graph, E> {
|
|||||||
// }
|
// }
|
||||||
|
|
||||||
// fn word_derivations_used_by_edge<G: RankingRuleGraphTrait>(
|
// fn word_derivations_used_by_edge<G: RankingRuleGraphTrait>(
|
||||||
// edge: G::EdgeDetails,
|
// edge: G::EdgeCondition,
|
||||||
// ) -> SubWordDerivations {
|
// ) -> SubWordDerivations {
|
||||||
// todo!()
|
// todo!()
|
||||||
// }
|
// }
|
||||||
|
|
||||||
|
/// A trait to be implemented by a marker type to build a graph-based ranking rule.
|
||||||
|
///
|
||||||
|
/// It mostly describes how to:
|
||||||
|
/// 1. Retrieve the set of edges (their cost and condition) between two nodes.
|
||||||
|
/// 2. Compute the document ids satisfying a condition
|
||||||
pub trait RankingRuleGraphTrait: Sized {
|
pub trait RankingRuleGraphTrait: Sized {
|
||||||
/// The details of an edge connecting two query nodes. These details
|
/// The condition of an edge connecting two query nodes. The condition
|
||||||
/// should be sufficient to compute the edge's cost and associated document ids
|
/// should be sufficient to compute the edge's cost and associated document ids
|
||||||
/// in [`compute_docids`](RankingRuleGraphTrait).
|
/// in [`resolve_edge_condition`](RankingRuleGraphTrait::resolve_edge_condition).
|
||||||
type EdgeDetails: Sized + Clone;
|
type EdgeCondition: Sized + Clone;
|
||||||
|
|
||||||
|
/// A structure used in the construction of the graph, created when a
|
||||||
|
/// query graph source node is visited. It is used to determine the cost
|
||||||
|
/// and condition of a ranking rule edge when the destination node is visited.
|
||||||
type BuildVisitedFromNode;
|
type BuildVisitedFromNode;
|
||||||
|
|
||||||
/// Return the label of the given edge details, to be used when visualising
|
/// Return the label of the given edge condition, to be used when visualising
|
||||||
/// the ranking rule graph using GraphViz.
|
/// the ranking rule graph.
|
||||||
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String;
|
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String;
|
||||||
|
|
||||||
/// Compute the document ids associated with the given edge.
|
/// Compute the document ids associated with the given edge condition,
|
||||||
fn compute_docids<'search>(
|
/// restricted to the given universe.
|
||||||
|
fn resolve_edge_condition<'search>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
edge_details: &Self::EdgeDetails,
|
edge_condition: &Self::EdgeCondition,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<RoaringBitmap>;
|
) -> Result<RoaringBitmap>;
|
||||||
|
|
||||||
/// Prepare to build the edges outgoing from `from_node`.
|
/// Prepare to build the edges outgoing from `source_node`.
|
||||||
///
|
///
|
||||||
/// This call is followed by zero, one or more calls to [`build_visit_to_node`](RankingRuleGraphTrait::build_visit_to_node),
|
/// This call is followed by zero, one or more calls to [`build_step_visit_destination_node`](RankingRuleGraphTrait::build_step_visit_destination_node),
|
||||||
/// which builds the actual edges.
|
/// which builds the actual edges.
|
||||||
fn build_visit_from_node<'search>(
|
fn build_step_visit_source_node<'search>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
from_node: &QueryNode,
|
source_node: &QueryNode,
|
||||||
) -> Result<Option<Self::BuildVisitedFromNode>>;
|
) -> Result<Option<Self::BuildVisitedFromNode>>;
|
||||||
|
|
||||||
/// Return the cost and details of the edges going from the previously visited node
|
/// Return the cost and condition of the edges going from the previously visited node
|
||||||
/// (with [`build_visit_from_node`](RankingRuleGraphTrait::build_visit_from_node)) to `to_node`.
|
/// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`.
|
||||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
to_node: &QueryNode,
|
dest_node: &QueryNode,
|
||||||
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
source_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>;
|
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>;
|
||||||
|
|
||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &RankingRuleGraph<Self>,
|
graph: &RankingRuleGraph<Self>,
|
||||||
@ -126,45 +153,32 @@ pub trait RankingRuleGraphTrait: Sized {
|
|||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// The graph used by graph-based ranking rules.
|
||||||
|
///
|
||||||
|
/// It is built on top of a [`QueryGraph`], keeping the same nodes
|
||||||
|
/// but replacing the edges.
|
||||||
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
||||||
pub query_graph: QueryGraph,
|
pub query_graph: QueryGraph,
|
||||||
// pub edges: Vec<HashMap<usize, Vec<Edge<G::EdgeDetails>>>>,
|
pub edges_store: Vec<Option<Edge<G::EdgeCondition>>>,
|
||||||
pub all_edges: Vec<Option<Edge<G::EdgeDetails>>>,
|
pub edges_of_node: Vec<SmallBitmap>,
|
||||||
|
|
||||||
pub node_edges: Vec<SmallBitmap>,
|
|
||||||
|
|
||||||
pub successors: Vec<SmallBitmap>,
|
|
||||||
// TODO: to get the edges between two nodes:
|
|
||||||
// 1. get node_outgoing_edges[from]
|
|
||||||
// 2. get node_incoming_edges[to]
|
|
||||||
// 3. take intersection betweem the two
|
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
Self {
|
Self {
|
||||||
query_graph: self.query_graph.clone(),
|
query_graph: self.query_graph.clone(),
|
||||||
all_edges: self.all_edges.clone(),
|
edges_store: self.edges_store.clone(),
|
||||||
node_edges: self.node_edges.clone(),
|
edges_of_node: self.edges_of_node.clone(),
|
||||||
successors: self.successors.clone(),
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||||
pub fn remove_edge(&mut self, edge_index: u16) {
|
/// Remove the given edge from the ranking rule graph
|
||||||
let edge_opt = &mut self.all_edges[edge_index as usize];
|
pub fn remove_ranking_rule_edge(&mut self, edge_index: u16) {
|
||||||
|
let edge_opt = &mut self.edges_store[edge_index as usize];
|
||||||
let Some(edge) = &edge_opt else { return };
|
let Some(edge) = &edge_opt else { return };
|
||||||
let (from_node, _to_node) = (edge.from_node, edge.to_node);
|
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
||||||
*edge_opt = None;
|
*edge_opt = None;
|
||||||
|
|
||||||
let from_node_edges = &mut self.node_edges[from_node as usize];
|
self.edges_of_node[source_node as usize].remove(edge_index);
|
||||||
from_node_edges.remove(edge_index);
|
|
||||||
|
|
||||||
let mut new_successors_from_node = SmallBitmap::new(self.all_edges.len() as u16);
|
|
||||||
let all_edges = &self.all_edges;
|
|
||||||
for from_node_edge in from_node_edges.iter() {
|
|
||||||
let Edge { to_node, .. } = &all_edges[from_node_edge as usize].as_ref().unwrap();
|
|
||||||
new_successors_from_node.insert(*to_node);
|
|
||||||
}
|
|
||||||
self.successors[from_node as usize] = new_successors_from_node;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -1,117 +1,32 @@
|
|||||||
use super::cheapest_paths::Path;
|
// What is PathSet used for?
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
|
||||||
|
|
||||||
// What is PathsMap used for?
|
|
||||||
// For the empty_prefixes field in the EmptyPathsCache only :/
|
// For the empty_prefixes field in the EmptyPathsCache only :/
|
||||||
// but it could be used for more, like efficient computing of a set of paths
|
// but it could be used for more, like efficient computing of a set of paths
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
/// A set of [`Path`]
|
||||||
pub struct PathsMap<V> {
|
#[derive(Default, Debug, Clone)]
|
||||||
pub nodes: Vec<(u16, PathsMap<V>)>,
|
pub struct PathSet {
|
||||||
pub value: Option<V>,
|
nodes: Vec<(u16, PathSet)>,
|
||||||
|
is_end: bool,
|
||||||
}
|
}
|
||||||
impl<V> Default for PathsMap<V> {
|
impl PathSet {
|
||||||
fn default() -> Self {
|
pub fn insert(&mut self, mut edges: impl Iterator<Item = u16>) {
|
||||||
Self { nodes: vec![], value: None }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl PathsMap<u64> {
|
|
||||||
pub fn from_paths(paths: &[Path]) -> Self {
|
|
||||||
let mut result = Self::default();
|
|
||||||
for p in paths {
|
|
||||||
result.add_path(p);
|
|
||||||
}
|
|
||||||
result
|
|
||||||
}
|
|
||||||
pub fn add_path(&mut self, path: &Path) {
|
|
||||||
self.insert(path.edges.iter().copied(), path.cost);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl<V> PathsMap<V> {
|
|
||||||
pub fn is_empty(&self) -> bool {
|
|
||||||
self.nodes.is_empty() && self.value.is_none()
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn insert(&mut self, mut edges: impl Iterator<Item = u16>, value: V) {
|
|
||||||
match edges.next() {
|
match edges.next() {
|
||||||
None => {
|
None => {
|
||||||
self.value = Some(value);
|
self.is_end = true;
|
||||||
}
|
}
|
||||||
Some(first_edge) => {
|
Some(first_edge) => {
|
||||||
// comment
|
|
||||||
for (edge, next_node) in &mut self.nodes {
|
for (edge, next_node) in &mut self.nodes {
|
||||||
if edge == &first_edge {
|
if edge == &first_edge {
|
||||||
return next_node.insert(edges, value);
|
return next_node.insert(edges);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let mut rest = PathsMap::default();
|
let mut rest = PathSet::default();
|
||||||
rest.insert(edges, value);
|
rest.insert(edges);
|
||||||
self.nodes.push((first_edge, rest));
|
self.nodes.push((first_edge, rest));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn remove_first_rec(&mut self, cur: &mut Vec<u16>) -> (bool, V) {
|
|
||||||
let Some((first_edge, rest)) = self.nodes.first_mut() else {
|
|
||||||
// The PathsMap has to be correct by construction here, otherwise
|
|
||||||
// the unwrap() will crash
|
|
||||||
return (true, self.value.take().unwrap())
|
|
||||||
};
|
|
||||||
cur.push(*first_edge);
|
|
||||||
let (rest_is_empty, value) = rest.remove_first_rec(cur);
|
|
||||||
if rest_is_empty {
|
|
||||||
self.nodes.remove(0);
|
|
||||||
(self.nodes.is_empty(), value)
|
|
||||||
} else {
|
|
||||||
(false, value)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn remove_first(&mut self) -> Option<(Vec<u16>, V)> {
|
|
||||||
if self.is_empty() {
|
|
||||||
return None;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut result = vec![];
|
|
||||||
let (_, value) = self.remove_first_rec(&mut result);
|
|
||||||
Some((result, value))
|
|
||||||
}
|
|
||||||
pub fn iterate_rec(&self, cur: &mut Vec<u16>, visit: &mut impl FnMut(&Vec<u16>, &V)) {
|
|
||||||
if let Some(value) = &self.value {
|
|
||||||
visit(cur, value);
|
|
||||||
}
|
|
||||||
for (first_edge, rest) in self.nodes.iter() {
|
|
||||||
cur.push(*first_edge);
|
|
||||||
rest.iterate_rec(cur, visit);
|
|
||||||
cur.pop();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn iterate(&self, mut visit: impl FnMut(&Vec<u16>, &V)) {
|
|
||||||
self.iterate_rec(&mut vec![], &mut visit)
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn remove_prefixes<U>(&mut self, prefixes: &PathsMap<U>) {
|
|
||||||
prefixes.iterate(|prefix, _v| {
|
|
||||||
self.remove_prefix(prefix);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
pub fn remove_edges(&mut self, forbidden_edges: &SmallBitmap) {
|
|
||||||
let mut i = 0;
|
|
||||||
while i < self.nodes.len() {
|
|
||||||
let should_remove = if forbidden_edges.contains(self.nodes[i].0) {
|
|
||||||
true
|
|
||||||
} else if !self.nodes[i].1.nodes.is_empty() {
|
|
||||||
self.nodes[i].1.remove_edges(forbidden_edges);
|
|
||||||
self.nodes[i].1.nodes.is_empty()
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
};
|
|
||||||
if should_remove {
|
|
||||||
self.nodes.remove(i);
|
|
||||||
} else {
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
pub fn remove_edge(&mut self, forbidden_edge: &u16) {
|
pub fn remove_edge(&mut self, forbidden_edge: &u16) {
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while i < self.nodes.len() {
|
while i < self.nodes.len() {
|
||||||
@ -130,34 +45,11 @@ impl<V> PathsMap<V> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn remove_prefix(&mut self, forbidden_prefix: &[u16]) {
|
|
||||||
let [first_edge, remaining_prefix @ ..] = forbidden_prefix else {
|
|
||||||
self.nodes.clear();
|
|
||||||
self.value = None;
|
|
||||||
return;
|
|
||||||
};
|
|
||||||
|
|
||||||
let mut i = 0;
|
|
||||||
while i < self.nodes.len() {
|
|
||||||
let edge = self.nodes[i].0;
|
|
||||||
let should_remove = if edge == *first_edge {
|
|
||||||
self.nodes[i].1.remove_prefix(remaining_prefix);
|
|
||||||
self.nodes[i].1.nodes.is_empty()
|
|
||||||
} else {
|
|
||||||
false
|
|
||||||
};
|
|
||||||
if should_remove {
|
|
||||||
self.nodes.remove(i);
|
|
||||||
} else {
|
|
||||||
i += 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn final_edges_after_prefix(&self, prefix: &[u16], visit: &mut impl FnMut(u16)) {
|
pub fn final_edges_after_prefix(&self, prefix: &[u16], visit: &mut impl FnMut(u16)) {
|
||||||
let [first_edge, remaining_prefix @ ..] = prefix else {
|
let [first_edge, remaining_prefix @ ..] = prefix else {
|
||||||
for node in self.nodes.iter() {
|
for node in self.nodes.iter() {
|
||||||
if node.1.value.is_some() {
|
if node.1.is_end {
|
||||||
visit(node.0)
|
visit(node.0)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -170,20 +62,8 @@ impl<V> PathsMap<V> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn edge_indices_after_prefix(&self, prefix: &[u16]) -> Vec<u16> {
|
|
||||||
let [first_edge, remaining_prefix @ ..] = prefix else {
|
|
||||||
return self.nodes.iter().map(|n| n.0).collect();
|
|
||||||
};
|
|
||||||
for (edge, rest) in self.nodes.iter() {
|
|
||||||
if edge == first_edge {
|
|
||||||
return rest.edge_indices_after_prefix(remaining_prefix);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
vec![]
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn contains_prefix_of_path(&self, path: &[u16]) -> bool {
|
pub fn contains_prefix_of_path(&self, path: &[u16]) -> bool {
|
||||||
if self.value.is_some() {
|
if self.is_end {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
match path {
|
match path {
|
||||||
|
@ -5,7 +5,7 @@ use itertools::Itertools;
|
|||||||
use super::ProximityEdge;
|
use super::ProximityEdge;
|
||||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||||
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
||||||
use crate::search::new::ranking_rule_graph::EdgeDetails;
|
use crate::search::new::ranking_rule_graph::EdgeCondition;
|
||||||
use crate::search::new::{QueryNode, SearchContext};
|
use crate::search::new::{QueryNode, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
@ -57,10 +57,10 @@ pub fn visit_to_node<'search, 'from_data>(
|
|||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
to_node: &QueryNode,
|
to_node: &QueryNode,
|
||||||
from_node_data: &'from_data (WordDerivations, i8),
|
from_node_data: &'from_data (WordDerivations, i8),
|
||||||
) -> Result<Vec<(u8, EdgeDetails<ProximityEdge>)>> {
|
) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> {
|
||||||
let (derivations1, pos1) = from_node_data;
|
let (derivations1, pos1) = from_node_data;
|
||||||
let term2 = match &to_node {
|
let term2 = match &to_node {
|
||||||
QueryNode::End => return Ok(vec![(0, EdgeDetails::Unconditional)]),
|
QueryNode::End => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||||
QueryNode::Deleted | QueryNode::Start => return Ok(vec![]),
|
QueryNode::Deleted | QueryNode::Start => return Ok(vec![]),
|
||||||
QueryNode::Term(term) => term,
|
QueryNode::Term(term) => term,
|
||||||
};
|
};
|
||||||
@ -96,7 +96,7 @@ pub fn visit_to_node<'search, 'from_data>(
|
|||||||
// We want to effectively ignore this pair of terms
|
// We want to effectively ignore this pair of terms
|
||||||
// Unconditionally walk through the edge without computing the docids
|
// Unconditionally walk through the edge without computing the docids
|
||||||
// But also what should the cost be?
|
// But also what should the cost be?
|
||||||
return Ok(vec![(0, EdgeDetails::Unconditional)]);
|
return Ok(vec![(0, EdgeCondition::Unconditional)]);
|
||||||
}
|
}
|
||||||
|
|
||||||
let updb1 = derivations1.use_prefix_db;
|
let updb1 = derivations1.use_prefix_db;
|
||||||
@ -189,7 +189,7 @@ pub fn visit_to_node<'search, 'from_data>(
|
|||||||
for (proximity, word_pairs) in proximity_word_pairs {
|
for (proximity, word_pairs) in proximity_word_pairs {
|
||||||
edges.push((
|
edges.push((
|
||||||
cost,
|
cost,
|
||||||
EdgeDetails::Data(ProximityEdge {
|
EdgeCondition::Conditional(ProximityEdge {
|
||||||
pairs: word_pairs.into_boxed_slice(),
|
pairs: word_pairs.into_boxed_slice(),
|
||||||
proximity,
|
proximity,
|
||||||
}),
|
}),
|
||||||
@ -198,6 +198,6 @@ pub fn visit_to_node<'search, 'from_data>(
|
|||||||
edges
|
edges
|
||||||
})
|
})
|
||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
new_edges.push((8 + (ngram_len2 - 1) as u8, EdgeDetails::Unconditional));
|
new_edges.push((8 + (ngram_len2 - 1) as u8, EdgeCondition::Unconditional));
|
||||||
Ok(new_edges)
|
Ok(new_edges)
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ pub mod compute_docids;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::empty_paths_cache::EmptyPathsCache;
|
use super::empty_paths_cache::EmptyPathsCache;
|
||||||
use super::{EdgeDetails, RankingRuleGraphTrait};
|
use super::{EdgeCondition, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::Interned;
|
use crate::search::new::interner::Interned;
|
||||||
use crate::search::new::logger::SearchLogger;
|
use crate::search::new::logger::SearchLogger;
|
||||||
use crate::search::new::query_term::WordDerivations;
|
use crate::search::new::query_term::WordDerivations;
|
||||||
@ -30,34 +30,34 @@ pub struct ProximityEdge {
|
|||||||
pub enum ProximityGraph {}
|
pub enum ProximityGraph {}
|
||||||
|
|
||||||
impl RankingRuleGraphTrait for ProximityGraph {
|
impl RankingRuleGraphTrait for ProximityGraph {
|
||||||
type EdgeDetails = ProximityEdge;
|
type EdgeCondition = ProximityEdge;
|
||||||
type BuildVisitedFromNode = (WordDerivations, i8);
|
type BuildVisitedFromNode = (WordDerivations, i8);
|
||||||
|
|
||||||
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String {
|
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String {
|
||||||
let ProximityEdge { pairs, proximity } = edge;
|
let ProximityEdge { pairs, proximity } = edge;
|
||||||
format!(", prox {proximity}, {} pairs", pairs.len())
|
format!(", prox {proximity}, {} pairs", pairs.len())
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compute_docids<'search>(
|
fn resolve_edge_condition<'search>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
edge: &Self::EdgeDetails,
|
edge: &Self::EdgeCondition,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<roaring::RoaringBitmap> {
|
) -> Result<roaring::RoaringBitmap> {
|
||||||
compute_docids::compute_docids(ctx, edge, universe)
|
compute_docids::compute_docids(ctx, edge, universe)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_visit_from_node<'search>(
|
fn build_step_visit_source_node<'search>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
from_node: &QueryNode,
|
from_node: &QueryNode,
|
||||||
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
||||||
build::visit_from_node(ctx, from_node)
|
build::visit_from_node(ctx, from_node)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
to_node: &QueryNode,
|
to_node: &QueryNode,
|
||||||
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
|
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||||
build::visit_to_node(ctx, to_node, from_node_data)
|
build::visit_to_node(ctx, to_node, from_node_data)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -2,7 +2,7 @@ use heed::BytesDecode;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::empty_paths_cache::EmptyPathsCache;
|
use super::empty_paths_cache::EmptyPathsCache;
|
||||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::Interned;
|
use crate::search::new::interner::Interned;
|
||||||
use crate::search::new::logger::SearchLogger;
|
use crate::search::new::logger::SearchLogger;
|
||||||
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
||||||
@ -20,19 +20,19 @@ pub enum TypoEdge {
|
|||||||
pub enum TypoGraph {}
|
pub enum TypoGraph {}
|
||||||
|
|
||||||
impl RankingRuleGraphTrait for TypoGraph {
|
impl RankingRuleGraphTrait for TypoGraph {
|
||||||
type EdgeDetails = TypoEdge;
|
type EdgeCondition = TypoEdge;
|
||||||
type BuildVisitedFromNode = ();
|
type BuildVisitedFromNode = ();
|
||||||
|
|
||||||
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String {
|
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String {
|
||||||
match edge {
|
match edge {
|
||||||
TypoEdge::Phrase { .. } => ", 0 typos".to_owned(),
|
TypoEdge::Phrase { .. } => ", 0 typos".to_owned(),
|
||||||
TypoEdge::Word { nbr_typos, .. } => format!(", {nbr_typos} typos"),
|
TypoEdge::Word { nbr_typos, .. } => format!(", {nbr_typos} typos"),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn compute_docids<'db_cache, 'search>(
|
fn resolve_edge_condition<'db_cache, 'search>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
edge: &Self::EdgeDetails,
|
edge: &Self::EdgeCondition,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
match edge {
|
match edge {
|
||||||
@ -66,29 +66,29 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_visit_from_node<'search>(
|
fn build_step_visit_source_node<'search>(
|
||||||
_ctx: &mut SearchContext<'search>,
|
_ctx: &mut SearchContext<'search>,
|
||||||
_from_node: &QueryNode,
|
_from_node: &QueryNode,
|
||||||
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
||||||
Ok(Some(()))
|
Ok(Some(()))
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
||||||
_ctx: &mut SearchContext<'search>,
|
_ctx: &mut SearchContext<'search>,
|
||||||
to_node: &QueryNode,
|
to_node: &QueryNode,
|
||||||
_from_node_data: &'from_data Self::BuildVisitedFromNode,
|
_from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
|
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||||
match to_node {
|
match to_node {
|
||||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
|
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
|
||||||
&QueryTerm::Phrase { phrase } => {
|
&QueryTerm::Phrase { phrase } => {
|
||||||
Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase { phrase }))])
|
Ok(vec![(0, EdgeCondition::Conditional(TypoEdge::Phrase { phrase }))])
|
||||||
}
|
}
|
||||||
QueryTerm::Word { derivations } => {
|
QueryTerm::Word { derivations } => {
|
||||||
let mut edges = vec![];
|
let mut edges = vec![];
|
||||||
if !derivations.zero_typo.is_empty() || derivations.use_prefix_db {
|
if !derivations.zero_typo.is_empty() || derivations.use_prefix_db {
|
||||||
edges.push((
|
edges.push((
|
||||||
0,
|
0,
|
||||||
EdgeDetails::Data(TypoEdge::Word {
|
EdgeCondition::Conditional(TypoEdge::Word {
|
||||||
derivations: derivations.clone(),
|
derivations: derivations.clone(),
|
||||||
nbr_typos: 0,
|
nbr_typos: 0,
|
||||||
}),
|
}),
|
||||||
@ -97,7 +97,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
if !derivations.one_typo.is_empty() {
|
if !derivations.one_typo.is_empty() {
|
||||||
edges.push((
|
edges.push((
|
||||||
1,
|
1,
|
||||||
EdgeDetails::Data(TypoEdge::Word {
|
EdgeCondition::Conditional(TypoEdge::Word {
|
||||||
derivations: derivations.clone(),
|
derivations: derivations.clone(),
|
||||||
nbr_typos: 1,
|
nbr_typos: 1,
|
||||||
}),
|
}),
|
||||||
@ -106,7 +106,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
if !derivations.two_typos.is_empty() {
|
if !derivations.two_typos.is_empty() {
|
||||||
edges.push((
|
edges.push((
|
||||||
2,
|
2,
|
||||||
EdgeDetails::Data(TypoEdge::Word {
|
EdgeCondition::Conditional(TypoEdge::Word {
|
||||||
derivations: derivations.clone(),
|
derivations: derivations.clone(),
|
||||||
nbr_typos: 2,
|
nbr_typos: 2,
|
||||||
}),
|
}),
|
||||||
@ -115,7 +115,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
Ok(edges)
|
Ok(edges)
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
QueryNode::End => Ok(vec![(0, EdgeDetails::Unconditional)]),
|
QueryNode::End => Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||||
QueryNode::Deleted | QueryNode::Start => panic!(),
|
QueryNode::Deleted | QueryNode::Start => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user