Continue documenting and cleaning up the code

This commit is contained in:
Loïc Lecrenier 2023-03-08 15:04:25 +01:00
parent c232cdabf5
commit 2099991dd1
12 changed files with 245 additions and 325 deletions

View File

@ -85,15 +85,15 @@ fn remove_empty_edges<'search, G: RankingRuleGraphTrait>(
universe: &RoaringBitmap, universe: &RoaringBitmap,
empty_paths_cache: &mut EmptyPathsCache, empty_paths_cache: &mut EmptyPathsCache,
) -> Result<()> { ) -> Result<()> {
for edge_index in 0..graph.all_edges.len() as u16 { for edge_index in 0..graph.edges_store.len() as u16 {
if graph.all_edges[edge_index as usize].is_none() { if graph.edges_store[edge_index as usize].is_none() {
continue; continue;
} }
let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?; let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?;
match docids { match docids {
BitmapOrAllRef::Bitmap(docids) => { BitmapOrAllRef::Bitmap(docids) => {
if docids.is_disjoint(universe) { if docids.is_disjoint(universe) {
graph.remove_edge(edge_index); graph.remove_ranking_rule_edge(edge_index);
empty_paths_cache.forbid_edge(edge_index); empty_paths_cache.forbid_edge(edge_index);
edge_docids_cache.cache.remove(&edge_index); edge_docids_cache.cache.remove(&edge_index);
continue; continue;
@ -120,7 +120,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
) -> Result<()> { ) -> Result<()> {
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?; let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
let mut edge_docids_cache = EdgeDocidsCache::default(); let mut edge_docids_cache = EdgeDocidsCache::default();
let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len() as u16); let mut empty_paths_cache = EmptyPathsCache::new(graph.edges_store.len() as u16);
// First simplify the graph as much as possible, by computing the docids of the edges // First simplify the graph as much as possible, by computing the docids of the edges
// within the rule's universe and removing the edges that have no associated docids. // within the rule's universe and removing the edges that have no associated docids.
@ -242,7 +242,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
// 1. Store in the cache that this edge is empty for this universe // 1. Store in the cache that this edge is empty for this universe
empty_paths_cache.forbid_edge(edge_index); empty_paths_cache.forbid_edge(edge_index);
// 2. remove this edge from the ranking rule graph // 2. remove this edge from the ranking rule graph
graph.remove_edge(edge_index); graph.remove_ranking_rule_edge(edge_index);
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore // 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
edge_docids_cache.cache.remove(&edge_index); edge_docids_cache.cache.remove(&edge_index);
return Ok(()); return Ok(());

View File

@ -8,7 +8,7 @@ use roaring::RoaringBitmap;
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::search::new::ranking_rule_graph::{ use crate::search::new::ranking_rule_graph::{
Edge, EdgeDetails, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, Edge, EdgeCondition, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
TypoGraph, TypoGraph,
}; };
use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
@ -534,24 +534,24 @@ shape: class"
let distances = &distances[node_idx]; let distances = &distances[node_idx];
Self::query_node_d2_desc(ctx, node_idx, node, distances.as_slice(), file); Self::query_node_d2_desc(ctx, node_idx, node, distances.as_slice(), file);
} }
for edge in graph.all_edges.iter().flatten() { for edge in graph.edges_store.iter().flatten() {
let Edge { from_node, to_node, details, .. } = edge; let Edge { source_node, dest_node, condition: details, .. } = edge;
match &details { match &details {
EdgeDetails::Unconditional => { EdgeCondition::Unconditional => {
writeln!( writeln!(
file, file,
"{from_node} -> {to_node} : \"always cost {cost}\"", "{source_node} -> {dest_node} : \"always cost {cost}\"",
cost = edge.cost, cost = edge.cost,
) )
.unwrap(); .unwrap();
} }
EdgeDetails::Data(details) => { EdgeCondition::Conditional(details) => {
writeln!( writeln!(
file, file,
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"", "{source_node} -> {dest_node} : \"cost {cost} {edge_label}\"",
cost = edge.cost, cost = edge.cost,
edge_label = R::graphviz_edge_details_label(details) edge_label = R::label_for_edge_condition(details)
) )
.unwrap(); .unwrap();
} }
@ -589,10 +589,10 @@ shape: class"
edge_idx: u16, edge_idx: u16,
file: &mut File, file: &mut File,
) { ) {
let Edge { from_node, to_node, cost, .. } = let Edge { source_node, dest_node, cost, .. } =
graph.all_edges[edge_idx as usize].as_ref().unwrap(); graph.edges_store[edge_idx as usize].as_ref().unwrap();
let from_node = &graph.query_graph.nodes[*from_node as usize]; let source_node = &graph.query_graph.nodes[*source_node as usize];
let from_node_desc = match from_node { let source_node_desc = match source_node {
QueryNode::Term(term) => match &term.value { QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase { phrase } => { QueryTerm::Phrase { phrase } => {
let phrase = ctx.phrase_interner.get(*phrase); let phrase = ctx.phrase_interner.get(*phrase);
@ -606,8 +606,8 @@ shape: class"
QueryNode::Start => "START".to_owned(), QueryNode::Start => "START".to_owned(),
QueryNode::End => "END".to_owned(), QueryNode::End => "END".to_owned(),
}; };
let to_node = &graph.query_graph.nodes[*to_node as usize]; let dest_node = &graph.query_graph.nodes[*dest_node as usize];
let to_node_desc = match to_node { let dest_node_desc = match dest_node {
QueryNode::Term(term) => match &term.value { QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase { phrase } => { QueryTerm::Phrase { phrase } => {
let phrase = ctx.phrase_interner.get(*phrase); let phrase = ctx.phrase_interner.get(*phrase);
@ -623,7 +623,7 @@ shape: class"
}; };
writeln!( writeln!(
file, file,
"{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{ "{edge_idx}: \"{source_node_desc}->{dest_node_desc} [{cost}]\" {{
shape: class shape: class
}}" }}"
) )

View File

@ -1,6 +1,3 @@
// TODO: put primitive query part in here
use std::borrow::Cow;
use std::mem; use std::mem;
use std::ops::RangeInclusive; use std::ops::RangeInclusive;
@ -18,6 +15,8 @@ use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
use crate::search::{build_dfa, get_first}; use crate::search::{build_dfa, get_first};
use crate::{CboRoaringBitmapLenCodec, Index, Result}; use crate::{CboRoaringBitmapLenCodec, Index, Result};
/// A phrase in the user's search query, consisting of several words
/// that must appear side-by-side in the search results.
#[derive(Default, Clone, PartialEq, Eq, Hash)] #[derive(Default, Clone, PartialEq, Eq, Hash)]
pub struct Phrase { pub struct Phrase {
pub words: Vec<Option<Interned<String>>>, pub words: Vec<Option<Interned<String>>>,
@ -28,18 +27,38 @@ impl Phrase {
} }
} }
/// A structure storing all the different ways to match
/// a term in the user's search query.
#[derive(Clone)] #[derive(Clone)]
pub struct WordDerivations { pub struct WordDerivations {
/// The original word
pub original: Interned<String>, pub original: Interned<String>,
// TODO: pub prefix_of: Vec<String>, // TODO: original should only be used for debugging purposes?
// TODO: pub zero_typo: Option<Interned<String>>,
// TODO: pub prefix_of: Box<[Interned<String>]>,
/// All the synonyms of the original word
pub synonyms: Box<[Interned<Phrase>]>, pub synonyms: Box<[Interned<Phrase>]>,
/// The original word split into multiple consecutive words
pub split_words: Option<Interned<Phrase>>, pub split_words: Option<Interned<Phrase>>,
/// The original words and words which are prefixed by it
pub zero_typo: Box<[Interned<String>]>, pub zero_typo: Box<[Interned<String>]>,
/// Words that are 1 typo away from the original word
pub one_typo: Box<[Interned<String>]>, pub one_typo: Box<[Interned<String>]>,
/// Words that are 2 typos away from the original word
pub two_typos: Box<[Interned<String>]>, pub two_typos: Box<[Interned<String>]>,
/// True if the prefix databases must be used to retrieve
/// the words which are prefixed by the original word.
pub use_prefix_db: bool, pub use_prefix_db: bool,
} }
impl WordDerivations { impl WordDerivations {
/// Return an iterator over all the single words derived from the original word.
///
/// This excludes synonyms, split words, and words stored in the prefix databases.
pub fn all_derivations_except_prefix_db( pub fn all_derivations_except_prefix_db(
&'_ self, &'_ self,
) -> impl Iterator<Item = Interned<String>> + Clone + '_ { ) -> impl Iterator<Item = Interned<String>> + Clone + '_ {
@ -49,17 +68,20 @@ impl WordDerivations {
self.zero_typo.is_empty() self.zero_typo.is_empty()
&& self.one_typo.is_empty() && self.one_typo.is_empty()
&& self.two_typos.is_empty() && self.two_typos.is_empty()
&& self.synonyms.is_empty()
&& self.split_words.is_none()
&& !self.use_prefix_db && !self.use_prefix_db
} }
} }
/// Compute the word derivations for the given word
pub fn word_derivations( pub fn word_derivations(
ctx: &mut SearchContext, ctx: &mut SearchContext,
word: &str, word: &str,
max_typo: u8, max_typo: u8,
is_prefix: bool, is_prefix: bool,
fst: &fst::Set<Cow<[u8]>>,
) -> Result<WordDerivations> { ) -> Result<WordDerivations> {
let fst = ctx.index.words_fst(ctx.txn)?;
let word_interned = ctx.word_interner.insert(word.to_owned()); let word_interned = ctx.word_interner.insert(word.to_owned());
let use_prefix_db = is_prefix let use_prefix_db = is_prefix
@ -171,6 +193,10 @@ pub fn word_derivations(
}) })
} }
/// Split the original word into the two words that appear the
/// most next to each other in the index.
///
/// Return `None` if the original word cannot be split.
fn split_best_frequency( fn split_best_frequency(
index: &Index, index: &Index,
txn: &RoTxn, txn: &RoTxn,
@ -199,16 +225,12 @@ fn split_best_frequency(
#[derive(Clone)] #[derive(Clone)]
pub enum QueryTerm { pub enum QueryTerm {
// TODO: should there be SplitWord, NGram2, and NGram3 variants?
// NGram2 can have 1 typo and synonyms
// NGram3 cannot have typos but can have synonyms
// SplitWords are a phrase
// Can NGrams be prefixes?
Phrase { phrase: Interned<Phrase> }, Phrase { phrase: Interned<Phrase> },
Word { derivations: WordDerivations }, Word { derivations: WordDerivations },
} }
impl QueryTerm { impl QueryTerm {
/// Return the original word from the given query term
pub fn original_single_word<'interner>( pub fn original_single_word<'interner>(
&self, &self,
word_interner: &'interner Interner<String>, word_interner: &'interner Interner<String>,
@ -226,6 +248,7 @@ impl QueryTerm {
} }
} }
/// A query term term coupled with its position in the user's search query.
#[derive(Clone)] #[derive(Clone)]
pub struct LocatedQueryTerm { pub struct LocatedQueryTerm {
pub value: QueryTerm, pub value: QueryTerm,
@ -233,14 +256,18 @@ pub struct LocatedQueryTerm {
} }
impl LocatedQueryTerm { impl LocatedQueryTerm {
/// Return `true` iff the word derivations within the query term are empty
pub fn is_empty(&self) -> bool { pub fn is_empty(&self) -> bool {
match &self.value { match &self.value {
// TODO: phrases should be greedily computed, so that they can be excluded from
// the query graph right from the start?
QueryTerm::Phrase { phrase: _ } => false, QueryTerm::Phrase { phrase: _ } => false,
QueryTerm::Word { derivations, .. } => derivations.is_empty(), QueryTerm::Word { derivations, .. } => derivations.is_empty(),
} }
} }
} }
/// Convert the tokenised search query into a list of located query terms.
pub fn located_query_terms_from_string<'search>( pub fn located_query_terms_from_string<'search>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
query: NormalizedTokenIter<Vec<u8>>, query: NormalizedTokenIter<Vec<u8>>,
@ -250,8 +277,8 @@ pub fn located_query_terms_from_string<'search>(
let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?; let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?;
let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?; let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?;
// TODO: should `exact_words` also disable prefix search, ngrams, split words, or synonyms?
let exact_words = ctx.index.exact_words(ctx.txn)?; let exact_words = ctx.index.exact_words(ctx.txn)?;
let fst = ctx.index.words_fst(ctx.txn)?;
let nbr_typos = |word: &str| { let nbr_typos = |word: &str| {
if !authorize_typos if !authorize_typos
@ -266,9 +293,9 @@ pub fn located_query_terms_from_string<'search>(
} }
}; };
let mut primitive_query = Vec::new(); let mut located_terms = Vec::new();
let mut phrase = Vec::new();
let mut phrase = Vec::new();
let mut quoted = false; let mut quoted = false;
let parts_limit = words_limit.unwrap_or(usize::MAX); let parts_limit = words_limit.unwrap_or(usize::MAX);
@ -280,8 +307,8 @@ pub fn located_query_terms_from_string<'search>(
let mut peekable = query.peekable(); let mut peekable = query.peekable();
while let Some(token) = peekable.next() { while let Some(token) = peekable.next() {
// early return if word limit is exceeded // early return if word limit is exceeded
if primitive_query.len() >= parts_limit { if located_terms.len() >= parts_limit {
return Ok(primitive_query); return Ok(located_terms);
} }
match token.kind { match token.kind {
@ -307,24 +334,23 @@ pub fn located_query_terms_from_string<'search>(
match token.kind { match token.kind {
TokenKind::Word => { TokenKind::Word => {
let word = token.lemma(); let word = token.lemma();
let derivations = let derivations = word_derivations(ctx, word, nbr_typos(word), false)?;
word_derivations(ctx, word, nbr_typos(word), false, &fst)?;
let located_term = LocatedQueryTerm { let located_term = LocatedQueryTerm {
value: QueryTerm::Word { derivations }, value: QueryTerm::Word { derivations },
positions: position..=position, positions: position..=position,
}; };
primitive_query.push(located_term); located_terms.push(located_term);
} }
TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {} TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {}
} }
} else { } else {
let word = token.lemma(); let word = token.lemma();
let derivations = word_derivations(ctx, word, nbr_typos(word), true, &fst)?; let derivations = word_derivations(ctx, word, nbr_typos(word), true)?;
let located_term = LocatedQueryTerm { let located_term = LocatedQueryTerm {
value: QueryTerm::Word { derivations }, value: QueryTerm::Word { derivations },
positions: position..=position, positions: position..=position,
}; };
primitive_query.push(located_term); located_terms.push(located_term);
} }
} }
TokenKind::Separator(separator_kind) => { TokenKind::Separator(separator_kind) => {
@ -352,7 +378,7 @@ pub fn located_query_terms_from_string<'search>(
}, },
positions: phrase_start..=phrase_end, positions: phrase_start..=phrase_end,
}; };
primitive_query.push(located_query_term); located_terms.push(located_query_term);
} }
} }
_ => (), _ => (),
@ -367,10 +393,10 @@ pub fn located_query_terms_from_string<'search>(
}, },
positions: phrase_start..=phrase_end, positions: phrase_start..=phrase_end,
}; };
primitive_query.push(located_query_term); located_terms.push(located_query_term);
} }
Ok(primitive_query) Ok(located_terms)
} }
// TODO: return a word derivations instead? // TODO: return a word derivations instead?
@ -396,6 +422,8 @@ pub fn ngram2(
_ => None, _ => None,
} }
} }
// TODO: return a word derivations instead?
pub fn ngram3( pub fn ngram3(
ctx: &mut SearchContext, ctx: &mut SearchContext,
x: &LocatedQueryTerm, x: &LocatedQueryTerm,

View File

@ -6,49 +6,43 @@ use crate::search::new::{QueryGraph, SearchContext};
use crate::Result; use crate::Result;
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
/// Build the ranking rule graph from the given query graph
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> { pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph; let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph;
let mut all_edges = vec![]; let mut edges_store = vec![];
let mut node_edges = vec![]; let mut edges_of_node = vec![];
let mut successors = vec![];
for (node_idx, node) in graph_nodes.iter().enumerate() { for (node_idx, node) in graph_nodes.iter().enumerate() {
node_edges.push(HashSet::new()); edges_of_node.push(HashSet::new());
successors.push(HashSet::new()); let new_edges = edges_of_node.last_mut().unwrap();
let new_edges = node_edges.last_mut().unwrap();
let new_successors = successors.last_mut().unwrap();
let Some(from_node_data) = G::build_visit_from_node(ctx, node)? else { continue }; let Some(source_node_data) = G::build_step_visit_source_node(ctx, node)? else { continue };
for successor_idx in graph_edges[node_idx].successors.iter() { for successor_idx in graph_edges[node_idx].successors.iter() {
let to_node = &graph_nodes[successor_idx as usize]; let dest_node = &graph_nodes[successor_idx as usize];
let mut edges = G::build_visit_to_node(ctx, to_node, &from_node_data)?; let edges =
G::build_step_visit_destination_node(ctx, dest_node, &source_node_data)?;
if edges.is_empty() { if edges.is_empty() {
continue; continue;
} }
edges.sort_by_key(|e| e.0);
for (cost, details) in edges { for (cost, details) in edges {
all_edges.push(Some(Edge { edges_store.push(Some(Edge {
from_node: node_idx as u16, source_node: node_idx as u16,
to_node: successor_idx, dest_node: successor_idx,
cost, cost,
details, condition: details,
})); }));
new_edges.insert(all_edges.len() as u16 - 1); new_edges.insert(edges_store.len() as u16 - 1);
new_successors.insert(successor_idx);
} }
} }
} }
let node_edges = node_edges let edges_of_node = edges_of_node
.into_iter() .into_iter()
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), all_edges.len() as u16)) .map(|edges| SmallBitmap::from_iter(edges.into_iter(), edges_store.len() as u16))
.collect();
let successors = successors
.into_iter()
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), all_edges.len() as u16))
.collect(); .collect();
Ok(RankingRuleGraph { query_graph, all_edges, node_edges, successors }) Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node })
} }
} }

View File

@ -30,7 +30,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
empty_paths_cache, empty_paths_cache,
&mut visit, &mut visit,
&mut vec![], &mut vec![],
&mut SmallBitmap::new(self.all_edges.len() as u16), &mut SmallBitmap::new(self.edges_store.len() as u16),
empty_paths_cache.empty_edges.clone(), empty_paths_cache.empty_edges.clone(),
)?; )?;
Ok(()) Ok(())
@ -48,12 +48,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
) -> Result<bool> { ) -> Result<bool> {
let mut any_valid = false; let mut any_valid = false;
let edges = self.node_edges[from].clone(); let edges = self.edges_of_node[from].clone();
for edge_idx in edges.iter() { for edge_idx in edges.iter() {
let Some(edge) = self.all_edges[edge_idx as usize].as_ref() else { continue }; let Some(edge) = self.edges_store[edge_idx as usize].as_ref() else { continue };
if cost < edge.cost as u16 if cost < edge.cost as u16
|| forbidden_edges.contains(edge_idx) || forbidden_edges.contains(edge_idx)
|| !all_distances[edge.to_node as usize].iter().any( || !all_distances[edge.dest_node as usize].iter().any(
|(next_cost, necessary_edges)| { |(next_cost, necessary_edges)| {
(*next_cost == cost - edge.cost as u16) (*next_cost == cost - edge.cost as u16)
&& !forbidden_edges.intersects(necessary_edges) && !forbidden_edges.intersects(necessary_edges)
@ -71,13 +71,13 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
new_forbidden_edges.insert(x); new_forbidden_edges.insert(x);
}); });
let next_any_valid = if edge.to_node == self.query_graph.end_node { let next_any_valid = if edge.dest_node == self.query_graph.end_node {
any_valid = true; any_valid = true;
visit(prev_edges, self, empty_paths_cache)?; visit(prev_edges, self, empty_paths_cache)?;
true true
} else { } else {
self.visit_paths_of_cost_rec( self.visit_paths_of_cost_rec(
edge.to_node as usize, edge.dest_node as usize,
cost - edge.cost as u16, cost - edge.cost as u16,
all_distances, all_distances,
empty_paths_cache, empty_paths_cache,
@ -115,7 +115,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
let mut node_stack = VecDeque::new(); let mut node_stack = VecDeque::new();
distances_to_end[self.query_graph.end_node as usize] = distances_to_end[self.query_graph.end_node as usize] =
vec![(0, SmallBitmap::new(self.all_edges.len() as u16))]; vec![(0, SmallBitmap::new(self.edges_store.len() as u16))];
for prev_node in for prev_node in
self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter() self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter()
@ -127,15 +127,15 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
while let Some(cur_node) = node_stack.pop_front() { while let Some(cur_node) = node_stack.pop_front() {
let mut self_distances = BTreeMap::<u16, SmallBitmap>::new(); let mut self_distances = BTreeMap::<u16, SmallBitmap>::new();
let cur_node_edges = &self.node_edges[cur_node]; let cur_node_edges = &self.edges_of_node[cur_node];
for edge_idx in cur_node_edges.iter() { for edge_idx in cur_node_edges.iter() {
let edge = self.all_edges[edge_idx as usize].as_ref().unwrap(); let edge = self.edges_store[edge_idx as usize].as_ref().unwrap();
let succ_node = edge.to_node; let succ_node = edge.dest_node;
let succ_distances = &distances_to_end[succ_node as usize]; let succ_distances = &distances_to_end[succ_node as usize];
for (succ_distance, succ_necessary_edges) in succ_distances { for (succ_distance, succ_necessary_edges) in succ_distances {
let potential_necessary_edges = SmallBitmap::from_iter( let potential_necessary_edges = SmallBitmap::from_iter(
std::iter::once(edge_idx).chain(succ_necessary_edges.iter()), std::iter::once(edge_idx).chain(succ_necessary_edges.iter()),
self.all_edges.len() as u16, self.edges_store.len() as u16,
); );
match self_distances.entry(edge.cost as u16 + succ_distance) { match self_distances.entry(edge.cost as u16 + succ_distance) {
Entry::Occupied(mut prev_necessary_edges) => { Entry::Occupied(mut prev_necessary_edges) => {

View File

@ -3,28 +3,13 @@ use std::marker::PhantomData;
use fxhash::FxHashMap; use fxhash::FxHashMap;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::{BitmapOrAllRef, SearchContext}; use crate::search::new::{BitmapOrAllRef, SearchContext};
use crate::Result; use crate::Result;
// TODO: the cache should have a G::EdgeDetails as key /// A cache storing the document ids associated with each ranking rule edge
// but then it means that we should have a quick way of
// computing their hash and comparing them
// which can be done...
// by using a pointer (real, Rc, bumpalo, or in a vector)???
//
// But actually.... the edge details' docids are a subset of the universe at the
// moment they were computed.
// But the universes between two iterations of a ranking rule are completely different
// Thus, there is no point in doing this.
// UNLESS...
// we compute the whole docids corresponding to the edge details (potentially expensive in time and memory
// in the common case)
//
// But we could still benefit within a single iteration for requests like:
// `a a a a a a a a a` where we have many of the same edge details, repeated
pub struct EdgeDocidsCache<G: RankingRuleGraphTrait> { pub struct EdgeDocidsCache<G: RankingRuleGraphTrait> {
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
pub cache: FxHashMap<u16, RoaringBitmap>, pub cache: FxHashMap<u16, RoaringBitmap>,
_phantom: PhantomData<G>, _phantom: PhantomData<G>,
} }
@ -34,19 +19,24 @@ impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> {
} }
} }
impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> { impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
/// Retrieve the document ids for the given edge condition.
///
/// If the cache does not yet contain these docids, they are computed
/// and inserted in the cache.
pub fn get_edge_docids<'s, 'search>( pub fn get_edge_docids<'s, 'search>(
&'s mut self, &'s mut self,
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
// TODO: should be Interned<EdgeCondition>
edge_index: u16, edge_index: u16,
graph: &RankingRuleGraph<G>, graph: &RankingRuleGraph<G>,
// TODO: maybe universe doesn't belong here // TODO: maybe universe doesn't belong here
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<BitmapOrAllRef<'s>> { ) -> Result<BitmapOrAllRef<'s>> {
let edge = graph.all_edges[edge_index as usize].as_ref().unwrap(); let edge = graph.edges_store[edge_index as usize].as_ref().unwrap();
match &edge.details { match &edge.condition {
EdgeDetails::Unconditional => Ok(BitmapOrAllRef::All), EdgeCondition::Unconditional => Ok(BitmapOrAllRef::All),
EdgeDetails::Data(details) => { EdgeCondition::Conditional(details) => {
if self.cache.contains_key(&edge_index) { if self.cache.contains_key(&edge_index) {
// TODO: should we update the bitmap in the cache if the new universe // TODO: should we update the bitmap in the cache if the new universe
// reduces it? // reduces it?
@ -56,7 +46,7 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index])); return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
} }
// TODO: maybe universe doesn't belong here // TODO: maybe universe doesn't belong here
let docids = universe & G::compute_docids(ctx, details, universe)?; let docids = universe & G::resolve_edge_condition(ctx, details, universe)?;
let _ = self.cache.insert(edge_index, docids); let _ = self.cache.insert(edge_index, docids);
let docids = &self.cache[&edge_index]; let docids = &self.cache[&edge_index];
Ok(BitmapOrAllRef::Bitmap(docids)) Ok(BitmapOrAllRef::Bitmap(docids))

View File

@ -1,20 +1,29 @@
use super::paths_map::PathsMap; use super::paths_map::PathSet;
use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
/// A cache which stores sufficient conditions for a path
/// to resolve to an empty set of candidates within the current
/// universe.
#[derive(Clone)] #[derive(Clone)]
pub struct EmptyPathsCache { pub struct EmptyPathsCache {
/// The set of edge indexes that resolve to no documents.
pub empty_edges: SmallBitmap, pub empty_edges: SmallBitmap,
pub empty_prefixes: PathsMap<()>, /// A set of path prefixes that resolve to no documents.
pub empty_prefixes: PathSet,
/// A set of empty couple of edge indexes that resolve to no documents.
pub empty_couple_edges: Vec<SmallBitmap>, pub empty_couple_edges: Vec<SmallBitmap>,
} }
impl EmptyPathsCache { impl EmptyPathsCache {
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
pub fn new(all_edges_len: u16) -> Self { pub fn new(all_edges_len: u16) -> Self {
Self { Self {
empty_edges: SmallBitmap::new(all_edges_len), empty_edges: SmallBitmap::new(all_edges_len),
empty_prefixes: PathsMap::default(), empty_prefixes: PathSet::default(),
empty_couple_edges: vec![SmallBitmap::new(all_edges_len); all_edges_len as usize], empty_couple_edges: vec![SmallBitmap::new(all_edges_len); all_edges_len as usize],
} }
} }
/// Store in the cache that every path containing the given edge resolves to no documents.
pub fn forbid_edge(&mut self, edge_idx: u16) { pub fn forbid_edge(&mut self, edge_idx: u16) {
self.empty_edges.insert(edge_idx); self.empty_edges.insert(edge_idx);
self.empty_couple_edges[edge_idx as usize].clear(); self.empty_couple_edges[edge_idx as usize].clear();
@ -23,12 +32,17 @@ impl EmptyPathsCache {
edges2.remove(edge_idx); edges2.remove(edge_idx);
} }
} }
/// Store in the cache that every path containing the given prefix resolves to no documents.
pub fn forbid_prefix(&mut self, prefix: &[u16]) { pub fn forbid_prefix(&mut self, prefix: &[u16]) {
self.empty_prefixes.insert(prefix.iter().copied(), ()); self.empty_prefixes.insert(prefix.iter().copied());
} }
/// Store in the cache that every path containing the two given edges resolves to no documents.
pub fn forbid_couple_edges(&mut self, edge1: u16, edge2: u16) { pub fn forbid_couple_edges(&mut self, edge1: u16, edge2: u16) {
self.empty_couple_edges[edge1 as usize].insert(edge2); self.empty_couple_edges[edge1 as usize].insert(edge2);
} }
/// Returns true if the cache can determine that the given path resolves to no documents.
pub fn path_is_empty(&self, path: &[u16], path_bitmap: &SmallBitmap) -> bool { pub fn path_is_empty(&self, path: &[u16], path_bitmap: &SmallBitmap) -> bool {
if path_bitmap.intersects(&self.empty_edges) { if path_bitmap.intersects(&self.empty_edges) {
return true; return true;

View File

@ -1,9 +1,19 @@
/*! Module implementing the graph used for the graph-based ranking rules
and its related algorithms.
A ranking rule graph is built on top of the [`QueryGraph`]: the nodes stay
the same but the edges are replaced.
*/
mod build; mod build;
mod cheapest_paths; mod cheapest_paths;
mod edge_docids_cache; mod edge_docids_cache;
mod empty_paths_cache; mod empty_paths_cache;
mod paths_map; mod paths_map;
/// Implementation of the `proximity` ranking rule
mod proximity; mod proximity;
/// Implementation of the `typo` ranking rule
mod typo; mod typo;
pub use edge_docids_cache::EdgeDocidsCache; pub use edge_docids_cache::EdgeDocidsCache;
@ -17,30 +27,38 @@ use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext}; use super::{QueryGraph, QueryNode, SearchContext};
use crate::Result; use crate::Result;
/// The condition that is associated with an edge in the ranking rule graph.
///
/// Some edges are unconditional, which means that traversing them does not reduce
/// the set of candidates.
///
/// Most edges, however, have a condition attached to them. For example, for the
/// proximity ranking rule, the condition could be that a word is N-close to another one.
/// When the edge is traversed, some database operations are executed to retrieve the set
/// of documents that satisfy the condition, which reduces the list of candidate document ids.
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum EdgeDetails<E> { pub enum EdgeCondition<E> {
Unconditional, Unconditional,
Data(E), Conditional(E),
} }
/// An edge in the ranking rule graph.
///
/// It contains:
/// 1. The source and destination nodes
/// 2. The cost of traversing this edge
/// 3. The condition associated with it
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Edge<E> { pub struct Edge<E> {
pub from_node: u16, pub source_node: u16,
pub to_node: u16, pub dest_node: u16,
pub cost: u8, pub cost: u8,
pub details: EdgeDetails<E>, pub condition: EdgeCondition<E>,
}
#[derive(Debug, Clone)]
pub struct EdgePointer<'graph, E> {
pub index: u16,
pub edge: &'graph Edge<E>,
} }
// pub struct SubWordDerivations { // pub struct SubWordDerivations {
// words: FxHashSet<Interned<String>>, // words: FxHashSet<Interned<String>>,
// synonyms: FxHashSet<Interned<Phrase>>, // NO! they're phrases, not strings // phrases: FxHashSet<Interned<Phrase>>,
// split_words: bool,
// use_prefix_db: bool, // use_prefix_db: bool,
// } // }
@ -74,46 +92,55 @@ pub struct EdgePointer<'graph, E> {
// } // }
// fn word_derivations_used_by_edge<G: RankingRuleGraphTrait>( // fn word_derivations_used_by_edge<G: RankingRuleGraphTrait>(
// edge: G::EdgeDetails, // edge: G::EdgeCondition,
// ) -> SubWordDerivations { // ) -> SubWordDerivations {
// todo!() // todo!()
// } // }
/// A trait to be implemented by a marker type to build a graph-based ranking rule.
///
/// It mostly describes how to:
/// 1. Retrieve the set of edges (their cost and condition) between two nodes.
/// 2. Compute the document ids satisfying a condition
pub trait RankingRuleGraphTrait: Sized { pub trait RankingRuleGraphTrait: Sized {
/// The details of an edge connecting two query nodes. These details /// The condition of an edge connecting two query nodes. The condition
/// should be sufficient to compute the edge's cost and associated document ids /// should be sufficient to compute the edge's cost and associated document ids
/// in [`compute_docids`](RankingRuleGraphTrait). /// in [`resolve_edge_condition`](RankingRuleGraphTrait::resolve_edge_condition).
type EdgeDetails: Sized + Clone; type EdgeCondition: Sized + Clone;
/// A structure used in the construction of the graph, created when a
/// query graph source node is visited. It is used to determine the cost
/// and condition of a ranking rule edge when the destination node is visited.
type BuildVisitedFromNode; type BuildVisitedFromNode;
/// Return the label of the given edge details, to be used when visualising /// Return the label of the given edge condition, to be used when visualising
/// the ranking rule graph using GraphViz. /// the ranking rule graph.
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String; fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String;
/// Compute the document ids associated with the given edge. /// Compute the document ids associated with the given edge condition,
fn compute_docids<'search>( /// restricted to the given universe.
fn resolve_edge_condition<'search>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
edge_details: &Self::EdgeDetails, edge_condition: &Self::EdgeCondition,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<RoaringBitmap>; ) -> Result<RoaringBitmap>;
/// Prepare to build the edges outgoing from `from_node`. /// Prepare to build the edges outgoing from `source_node`.
/// ///
/// This call is followed by zero, one or more calls to [`build_visit_to_node`](RankingRuleGraphTrait::build_visit_to_node), /// This call is followed by zero, one or more calls to [`build_step_visit_destination_node`](RankingRuleGraphTrait::build_step_visit_destination_node),
/// which builds the actual edges. /// which builds the actual edges.
fn build_visit_from_node<'search>( fn build_step_visit_source_node<'search>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
from_node: &QueryNode, source_node: &QueryNode,
) -> Result<Option<Self::BuildVisitedFromNode>>; ) -> Result<Option<Self::BuildVisitedFromNode>>;
/// Return the cost and details of the edges going from the previously visited node /// Return the cost and condition of the edges going from the previously visited node
/// (with [`build_visit_from_node`](RankingRuleGraphTrait::build_visit_from_node)) to `to_node`. /// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`.
fn build_visit_to_node<'from_data, 'search: 'from_data>( fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
to_node: &QueryNode, dest_node: &QueryNode,
from_node_data: &'from_data Self::BuildVisitedFromNode, source_node_data: &'from_data Self::BuildVisitedFromNode,
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>; ) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>;
fn log_state( fn log_state(
graph: &RankingRuleGraph<Self>, graph: &RankingRuleGraph<Self>,
@ -126,45 +153,32 @@ pub trait RankingRuleGraphTrait: Sized {
); );
} }
/// The graph used by graph-based ranking rules.
///
/// It is built on top of a [`QueryGraph`], keeping the same nodes
/// but replacing the edges.
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> { pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
pub query_graph: QueryGraph, pub query_graph: QueryGraph,
// pub edges: Vec<HashMap<usize, Vec<Edge<G::EdgeDetails>>>>, pub edges_store: Vec<Option<Edge<G::EdgeCondition>>>,
pub all_edges: Vec<Option<Edge<G::EdgeDetails>>>, pub edges_of_node: Vec<SmallBitmap>,
pub node_edges: Vec<SmallBitmap>,
pub successors: Vec<SmallBitmap>,
// TODO: to get the edges between two nodes:
// 1. get node_outgoing_edges[from]
// 2. get node_incoming_edges[to]
// 3. take intersection betweem the two
} }
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> { impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
fn clone(&self) -> Self { fn clone(&self) -> Self {
Self { Self {
query_graph: self.query_graph.clone(), query_graph: self.query_graph.clone(),
all_edges: self.all_edges.clone(), edges_store: self.edges_store.clone(),
node_edges: self.node_edges.clone(), edges_of_node: self.edges_of_node.clone(),
successors: self.successors.clone(),
} }
} }
} }
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
pub fn remove_edge(&mut self, edge_index: u16) { /// Remove the given edge from the ranking rule graph
let edge_opt = &mut self.all_edges[edge_index as usize]; pub fn remove_ranking_rule_edge(&mut self, edge_index: u16) {
let edge_opt = &mut self.edges_store[edge_index as usize];
let Some(edge) = &edge_opt else { return }; let Some(edge) = &edge_opt else { return };
let (from_node, _to_node) = (edge.from_node, edge.to_node); let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
*edge_opt = None; *edge_opt = None;
let from_node_edges = &mut self.node_edges[from_node as usize]; self.edges_of_node[source_node as usize].remove(edge_index);
from_node_edges.remove(edge_index);
let mut new_successors_from_node = SmallBitmap::new(self.all_edges.len() as u16);
let all_edges = &self.all_edges;
for from_node_edge in from_node_edges.iter() {
let Edge { to_node, .. } = &all_edges[from_node_edge as usize].as_ref().unwrap();
new_successors_from_node.insert(*to_node);
}
self.successors[from_node as usize] = new_successors_from_node;
} }
} }

View File

@ -1,117 +1,32 @@
use super::cheapest_paths::Path; // What is PathSet used for?
use crate::search::new::small_bitmap::SmallBitmap;
// What is PathsMap used for?
// For the empty_prefixes field in the EmptyPathsCache only :/ // For the empty_prefixes field in the EmptyPathsCache only :/
// but it could be used for more, like efficient computing of a set of paths // but it could be used for more, like efficient computing of a set of paths
#[derive(Debug, Clone)] /// A set of [`Path`]
pub struct PathsMap<V> { #[derive(Default, Debug, Clone)]
pub nodes: Vec<(u16, PathsMap<V>)>, pub struct PathSet {
pub value: Option<V>, nodes: Vec<(u16, PathSet)>,
is_end: bool,
} }
impl<V> Default for PathsMap<V> { impl PathSet {
fn default() -> Self { pub fn insert(&mut self, mut edges: impl Iterator<Item = u16>) {
Self { nodes: vec![], value: None }
}
}
impl PathsMap<u64> {
pub fn from_paths(paths: &[Path]) -> Self {
let mut result = Self::default();
for p in paths {
result.add_path(p);
}
result
}
pub fn add_path(&mut self, path: &Path) {
self.insert(path.edges.iter().copied(), path.cost);
}
}
impl<V> PathsMap<V> {
pub fn is_empty(&self) -> bool {
self.nodes.is_empty() && self.value.is_none()
}
pub fn insert(&mut self, mut edges: impl Iterator<Item = u16>, value: V) {
match edges.next() { match edges.next() {
None => { None => {
self.value = Some(value); self.is_end = true;
} }
Some(first_edge) => { Some(first_edge) => {
// comment
for (edge, next_node) in &mut self.nodes { for (edge, next_node) in &mut self.nodes {
if edge == &first_edge { if edge == &first_edge {
return next_node.insert(edges, value); return next_node.insert(edges);
} }
} }
let mut rest = PathsMap::default(); let mut rest = PathSet::default();
rest.insert(edges, value); rest.insert(edges);
self.nodes.push((first_edge, rest)); self.nodes.push((first_edge, rest));
} }
} }
} }
fn remove_first_rec(&mut self, cur: &mut Vec<u16>) -> (bool, V) {
let Some((first_edge, rest)) = self.nodes.first_mut() else {
// The PathsMap has to be correct by construction here, otherwise
// the unwrap() will crash
return (true, self.value.take().unwrap())
};
cur.push(*first_edge);
let (rest_is_empty, value) = rest.remove_first_rec(cur);
if rest_is_empty {
self.nodes.remove(0);
(self.nodes.is_empty(), value)
} else {
(false, value)
}
}
pub fn remove_first(&mut self) -> Option<(Vec<u16>, V)> {
if self.is_empty() {
return None;
}
let mut result = vec![];
let (_, value) = self.remove_first_rec(&mut result);
Some((result, value))
}
pub fn iterate_rec(&self, cur: &mut Vec<u16>, visit: &mut impl FnMut(&Vec<u16>, &V)) {
if let Some(value) = &self.value {
visit(cur, value);
}
for (first_edge, rest) in self.nodes.iter() {
cur.push(*first_edge);
rest.iterate_rec(cur, visit);
cur.pop();
}
}
pub fn iterate(&self, mut visit: impl FnMut(&Vec<u16>, &V)) {
self.iterate_rec(&mut vec![], &mut visit)
}
pub fn remove_prefixes<U>(&mut self, prefixes: &PathsMap<U>) {
prefixes.iterate(|prefix, _v| {
self.remove_prefix(prefix);
});
}
pub fn remove_edges(&mut self, forbidden_edges: &SmallBitmap) {
let mut i = 0;
while i < self.nodes.len() {
let should_remove = if forbidden_edges.contains(self.nodes[i].0) {
true
} else if !self.nodes[i].1.nodes.is_empty() {
self.nodes[i].1.remove_edges(forbidden_edges);
self.nodes[i].1.nodes.is_empty()
} else {
false
};
if should_remove {
self.nodes.remove(i);
} else {
i += 1;
}
}
}
pub fn remove_edge(&mut self, forbidden_edge: &u16) { pub fn remove_edge(&mut self, forbidden_edge: &u16) {
let mut i = 0; let mut i = 0;
while i < self.nodes.len() { while i < self.nodes.len() {
@ -130,34 +45,11 @@ impl<V> PathsMap<V> {
} }
} }
} }
pub fn remove_prefix(&mut self, forbidden_prefix: &[u16]) {
let [first_edge, remaining_prefix @ ..] = forbidden_prefix else {
self.nodes.clear();
self.value = None;
return;
};
let mut i = 0;
while i < self.nodes.len() {
let edge = self.nodes[i].0;
let should_remove = if edge == *first_edge {
self.nodes[i].1.remove_prefix(remaining_prefix);
self.nodes[i].1.nodes.is_empty()
} else {
false
};
if should_remove {
self.nodes.remove(i);
} else {
i += 1;
}
}
}
pub fn final_edges_after_prefix(&self, prefix: &[u16], visit: &mut impl FnMut(u16)) { pub fn final_edges_after_prefix(&self, prefix: &[u16], visit: &mut impl FnMut(u16)) {
let [first_edge, remaining_prefix @ ..] = prefix else { let [first_edge, remaining_prefix @ ..] = prefix else {
for node in self.nodes.iter() { for node in self.nodes.iter() {
if node.1.value.is_some() { if node.1.is_end {
visit(node.0) visit(node.0)
} }
} }
@ -170,20 +62,8 @@ impl<V> PathsMap<V> {
} }
} }
pub fn edge_indices_after_prefix(&self, prefix: &[u16]) -> Vec<u16> {
let [first_edge, remaining_prefix @ ..] = prefix else {
return self.nodes.iter().map(|n| n.0).collect();
};
for (edge, rest) in self.nodes.iter() {
if edge == first_edge {
return rest.edge_indices_after_prefix(remaining_prefix);
}
}
vec![]
}
pub fn contains_prefix_of_path(&self, path: &[u16]) -> bool { pub fn contains_prefix_of_path(&self, path: &[u16]) -> bool {
if self.value.is_some() { if self.is_end {
return true; return true;
} }
match path { match path {

View File

@ -5,7 +5,7 @@ use itertools::Itertools;
use super::ProximityEdge; use super::ProximityEdge;
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::search::new::ranking_rule_graph::proximity::WordPair; use crate::search::new::ranking_rule_graph::proximity::WordPair;
use crate::search::new::ranking_rule_graph::EdgeDetails; use crate::search::new::ranking_rule_graph::EdgeCondition;
use crate::search::new::{QueryNode, SearchContext}; use crate::search::new::{QueryNode, SearchContext};
use crate::Result; use crate::Result;
@ -57,10 +57,10 @@ pub fn visit_to_node<'search, 'from_data>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
to_node: &QueryNode, to_node: &QueryNode,
from_node_data: &'from_data (WordDerivations, i8), from_node_data: &'from_data (WordDerivations, i8),
) -> Result<Vec<(u8, EdgeDetails<ProximityEdge>)>> { ) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> {
let (derivations1, pos1) = from_node_data; let (derivations1, pos1) = from_node_data;
let term2 = match &to_node { let term2 = match &to_node {
QueryNode::End => return Ok(vec![(0, EdgeDetails::Unconditional)]), QueryNode::End => return Ok(vec![(0, EdgeCondition::Unconditional)]),
QueryNode::Deleted | QueryNode::Start => return Ok(vec![]), QueryNode::Deleted | QueryNode::Start => return Ok(vec![]),
QueryNode::Term(term) => term, QueryNode::Term(term) => term,
}; };
@ -96,7 +96,7 @@ pub fn visit_to_node<'search, 'from_data>(
// We want to effectively ignore this pair of terms // We want to effectively ignore this pair of terms
// Unconditionally walk through the edge without computing the docids // Unconditionally walk through the edge without computing the docids
// But also what should the cost be? // But also what should the cost be?
return Ok(vec![(0, EdgeDetails::Unconditional)]); return Ok(vec![(0, EdgeCondition::Unconditional)]);
} }
let updb1 = derivations1.use_prefix_db; let updb1 = derivations1.use_prefix_db;
@ -189,7 +189,7 @@ pub fn visit_to_node<'search, 'from_data>(
for (proximity, word_pairs) in proximity_word_pairs { for (proximity, word_pairs) in proximity_word_pairs {
edges.push(( edges.push((
cost, cost,
EdgeDetails::Data(ProximityEdge { EdgeCondition::Conditional(ProximityEdge {
pairs: word_pairs.into_boxed_slice(), pairs: word_pairs.into_boxed_slice(),
proximity, proximity,
}), }),
@ -198,6 +198,6 @@ pub fn visit_to_node<'search, 'from_data>(
edges edges
}) })
.collect::<Vec<_>>(); .collect::<Vec<_>>();
new_edges.push((8 + (ngram_len2 - 1) as u8, EdgeDetails::Unconditional)); new_edges.push((8 + (ngram_len2 - 1) as u8, EdgeCondition::Unconditional));
Ok(new_edges) Ok(new_edges)
} }

View File

@ -4,7 +4,7 @@ pub mod compute_docids;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::empty_paths_cache::EmptyPathsCache; use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeDetails, RankingRuleGraphTrait}; use super::{EdgeCondition, RankingRuleGraphTrait};
use crate::search::new::interner::Interned; use crate::search::new::interner::Interned;
use crate::search::new::logger::SearchLogger; use crate::search::new::logger::SearchLogger;
use crate::search::new::query_term::WordDerivations; use crate::search::new::query_term::WordDerivations;
@ -30,34 +30,34 @@ pub struct ProximityEdge {
pub enum ProximityGraph {} pub enum ProximityGraph {}
impl RankingRuleGraphTrait for ProximityGraph { impl RankingRuleGraphTrait for ProximityGraph {
type EdgeDetails = ProximityEdge; type EdgeCondition = ProximityEdge;
type BuildVisitedFromNode = (WordDerivations, i8); type BuildVisitedFromNode = (WordDerivations, i8);
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String { fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String {
let ProximityEdge { pairs, proximity } = edge; let ProximityEdge { pairs, proximity } = edge;
format!(", prox {proximity}, {} pairs", pairs.len()) format!(", prox {proximity}, {} pairs", pairs.len())
} }
fn compute_docids<'search>( fn resolve_edge_condition<'search>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
edge: &Self::EdgeDetails, edge: &Self::EdgeCondition,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<roaring::RoaringBitmap> { ) -> Result<roaring::RoaringBitmap> {
compute_docids::compute_docids(ctx, edge, universe) compute_docids::compute_docids(ctx, edge, universe)
} }
fn build_visit_from_node<'search>( fn build_step_visit_source_node<'search>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
from_node: &QueryNode, from_node: &QueryNode,
) -> Result<Option<Self::BuildVisitedFromNode>> { ) -> Result<Option<Self::BuildVisitedFromNode>> {
build::visit_from_node(ctx, from_node) build::visit_from_node(ctx, from_node)
} }
fn build_visit_to_node<'from_data, 'search: 'from_data>( fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
to_node: &QueryNode, to_node: &QueryNode,
from_node_data: &'from_data Self::BuildVisitedFromNode, from_node_data: &'from_data Self::BuildVisitedFromNode,
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> { ) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
build::visit_to_node(ctx, to_node, from_node_data) build::visit_to_node(ctx, to_node, from_node_data)
} }

View File

@ -2,7 +2,7 @@ use heed::BytesDecode;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::empty_paths_cache::EmptyPathsCache; use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::Interned; use crate::search::new::interner::Interned;
use crate::search::new::logger::SearchLogger; use crate::search::new::logger::SearchLogger;
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations}; use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
@ -20,19 +20,19 @@ pub enum TypoEdge {
pub enum TypoGraph {} pub enum TypoGraph {}
impl RankingRuleGraphTrait for TypoGraph { impl RankingRuleGraphTrait for TypoGraph {
type EdgeDetails = TypoEdge; type EdgeCondition = TypoEdge;
type BuildVisitedFromNode = (); type BuildVisitedFromNode = ();
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String { fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String {
match edge { match edge {
TypoEdge::Phrase { .. } => ", 0 typos".to_owned(), TypoEdge::Phrase { .. } => ", 0 typos".to_owned(),
TypoEdge::Word { nbr_typos, .. } => format!(", {nbr_typos} typos"), TypoEdge::Word { nbr_typos, .. } => format!(", {nbr_typos} typos"),
} }
} }
fn compute_docids<'db_cache, 'search>( fn resolve_edge_condition<'db_cache, 'search>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
edge: &Self::EdgeDetails, edge: &Self::EdgeCondition,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<RoaringBitmap> { ) -> Result<RoaringBitmap> {
match edge { match edge {
@ -66,29 +66,29 @@ impl RankingRuleGraphTrait for TypoGraph {
} }
} }
fn build_visit_from_node<'search>( fn build_step_visit_source_node<'search>(
_ctx: &mut SearchContext<'search>, _ctx: &mut SearchContext<'search>,
_from_node: &QueryNode, _from_node: &QueryNode,
) -> Result<Option<Self::BuildVisitedFromNode>> { ) -> Result<Option<Self::BuildVisitedFromNode>> {
Ok(Some(())) Ok(Some(()))
} }
fn build_visit_to_node<'from_data, 'search: 'from_data>( fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
_ctx: &mut SearchContext<'search>, _ctx: &mut SearchContext<'search>,
to_node: &QueryNode, to_node: &QueryNode,
_from_node_data: &'from_data Self::BuildVisitedFromNode, _from_node_data: &'from_data Self::BuildVisitedFromNode,
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> { ) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
match to_node { match to_node {
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value { QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
&QueryTerm::Phrase { phrase } => { &QueryTerm::Phrase { phrase } => {
Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase { phrase }))]) Ok(vec![(0, EdgeCondition::Conditional(TypoEdge::Phrase { phrase }))])
} }
QueryTerm::Word { derivations } => { QueryTerm::Word { derivations } => {
let mut edges = vec![]; let mut edges = vec![];
if !derivations.zero_typo.is_empty() || derivations.use_prefix_db { if !derivations.zero_typo.is_empty() || derivations.use_prefix_db {
edges.push(( edges.push((
0, 0,
EdgeDetails::Data(TypoEdge::Word { EdgeCondition::Conditional(TypoEdge::Word {
derivations: derivations.clone(), derivations: derivations.clone(),
nbr_typos: 0, nbr_typos: 0,
}), }),
@ -97,7 +97,7 @@ impl RankingRuleGraphTrait for TypoGraph {
if !derivations.one_typo.is_empty() { if !derivations.one_typo.is_empty() {
edges.push(( edges.push((
1, 1,
EdgeDetails::Data(TypoEdge::Word { EdgeCondition::Conditional(TypoEdge::Word {
derivations: derivations.clone(), derivations: derivations.clone(),
nbr_typos: 1, nbr_typos: 1,
}), }),
@ -106,7 +106,7 @@ impl RankingRuleGraphTrait for TypoGraph {
if !derivations.two_typos.is_empty() { if !derivations.two_typos.is_empty() {
edges.push(( edges.push((
2, 2,
EdgeDetails::Data(TypoEdge::Word { EdgeCondition::Conditional(TypoEdge::Word {
derivations: derivations.clone(), derivations: derivations.clone(),
nbr_typos: 2, nbr_typos: 2,
}), }),
@ -115,7 +115,7 @@ impl RankingRuleGraphTrait for TypoGraph {
Ok(edges) Ok(edges)
} }
}, },
QueryNode::End => Ok(vec![(0, EdgeDetails::Unconditional)]), QueryNode::End => Ok(vec![(0, EdgeCondition::Unconditional)]),
QueryNode::Deleted | QueryNode::Start => panic!(), QueryNode::Deleted | QueryNode::Start => panic!(),
} }
} }