mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Continue documenting and cleaning up the code
This commit is contained in:
parent
c232cdabf5
commit
2099991dd1
@ -85,15 +85,15 @@ fn remove_empty_edges<'search, G: RankingRuleGraphTrait>(
|
||||
universe: &RoaringBitmap,
|
||||
empty_paths_cache: &mut EmptyPathsCache,
|
||||
) -> Result<()> {
|
||||
for edge_index in 0..graph.all_edges.len() as u16 {
|
||||
if graph.all_edges[edge_index as usize].is_none() {
|
||||
for edge_index in 0..graph.edges_store.len() as u16 {
|
||||
if graph.edges_store[edge_index as usize].is_none() {
|
||||
continue;
|
||||
}
|
||||
let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?;
|
||||
match docids {
|
||||
BitmapOrAllRef::Bitmap(docids) => {
|
||||
if docids.is_disjoint(universe) {
|
||||
graph.remove_edge(edge_index);
|
||||
graph.remove_ranking_rule_edge(edge_index);
|
||||
empty_paths_cache.forbid_edge(edge_index);
|
||||
edge_docids_cache.cache.remove(&edge_index);
|
||||
continue;
|
||||
@ -120,7 +120,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
||||
) -> Result<()> {
|
||||
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
||||
let mut edge_docids_cache = EdgeDocidsCache::default();
|
||||
let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len() as u16);
|
||||
let mut empty_paths_cache = EmptyPathsCache::new(graph.edges_store.len() as u16);
|
||||
|
||||
// First simplify the graph as much as possible, by computing the docids of the edges
|
||||
// within the rule's universe and removing the edges that have no associated docids.
|
||||
@ -242,7 +242,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
||||
// 1. Store in the cache that this edge is empty for this universe
|
||||
empty_paths_cache.forbid_edge(edge_index);
|
||||
// 2. remove this edge from the ranking rule graph
|
||||
graph.remove_edge(edge_index);
|
||||
graph.remove_ranking_rule_edge(edge_index);
|
||||
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
|
||||
edge_docids_cache.cache.remove(&edge_index);
|
||||
return Ok(());
|
||||
|
@ -8,7 +8,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::search::new::ranking_rule_graph::{
|
||||
Edge, EdgeDetails, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
|
||||
Edge, EdgeCondition, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
|
||||
TypoGraph,
|
||||
};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
@ -534,24 +534,24 @@ shape: class"
|
||||
let distances = &distances[node_idx];
|
||||
Self::query_node_d2_desc(ctx, node_idx, node, distances.as_slice(), file);
|
||||
}
|
||||
for edge in graph.all_edges.iter().flatten() {
|
||||
let Edge { from_node, to_node, details, .. } = edge;
|
||||
for edge in graph.edges_store.iter().flatten() {
|
||||
let Edge { source_node, dest_node, condition: details, .. } = edge;
|
||||
|
||||
match &details {
|
||||
EdgeDetails::Unconditional => {
|
||||
EdgeCondition::Unconditional => {
|
||||
writeln!(
|
||||
file,
|
||||
"{from_node} -> {to_node} : \"always cost {cost}\"",
|
||||
"{source_node} -> {dest_node} : \"always cost {cost}\"",
|
||||
cost = edge.cost,
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
EdgeDetails::Data(details) => {
|
||||
EdgeCondition::Conditional(details) => {
|
||||
writeln!(
|
||||
file,
|
||||
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
|
||||
"{source_node} -> {dest_node} : \"cost {cost} {edge_label}\"",
|
||||
cost = edge.cost,
|
||||
edge_label = R::graphviz_edge_details_label(details)
|
||||
edge_label = R::label_for_edge_condition(details)
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
@ -589,10 +589,10 @@ shape: class"
|
||||
edge_idx: u16,
|
||||
file: &mut File,
|
||||
) {
|
||||
let Edge { from_node, to_node, cost, .. } =
|
||||
graph.all_edges[edge_idx as usize].as_ref().unwrap();
|
||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
||||
let from_node_desc = match from_node {
|
||||
let Edge { source_node, dest_node, cost, .. } =
|
||||
graph.edges_store[edge_idx as usize].as_ref().unwrap();
|
||||
let source_node = &graph.query_graph.nodes[*source_node as usize];
|
||||
let source_node_desc = match source_node {
|
||||
QueryNode::Term(term) => match &term.value {
|
||||
QueryTerm::Phrase { phrase } => {
|
||||
let phrase = ctx.phrase_interner.get(*phrase);
|
||||
@ -606,8 +606,8 @@ shape: class"
|
||||
QueryNode::Start => "START".to_owned(),
|
||||
QueryNode::End => "END".to_owned(),
|
||||
};
|
||||
let to_node = &graph.query_graph.nodes[*to_node as usize];
|
||||
let to_node_desc = match to_node {
|
||||
let dest_node = &graph.query_graph.nodes[*dest_node as usize];
|
||||
let dest_node_desc = match dest_node {
|
||||
QueryNode::Term(term) => match &term.value {
|
||||
QueryTerm::Phrase { phrase } => {
|
||||
let phrase = ctx.phrase_interner.get(*phrase);
|
||||
@ -623,7 +623,7 @@ shape: class"
|
||||
};
|
||||
writeln!(
|
||||
file,
|
||||
"{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
|
||||
"{edge_idx}: \"{source_node_desc}->{dest_node_desc} [{cost}]\" {{
|
||||
shape: class
|
||||
}}"
|
||||
)
|
||||
|
@ -1,6 +1,3 @@
|
||||
// TODO: put primitive query part in here
|
||||
|
||||
use std::borrow::Cow;
|
||||
use std::mem;
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
@ -18,6 +15,8 @@ use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||
use crate::search::{build_dfa, get_first};
|
||||
use crate::{CboRoaringBitmapLenCodec, Index, Result};
|
||||
|
||||
/// A phrase in the user's search query, consisting of several words
|
||||
/// that must appear side-by-side in the search results.
|
||||
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Phrase {
|
||||
pub words: Vec<Option<Interned<String>>>,
|
||||
@ -28,18 +27,38 @@ impl Phrase {
|
||||
}
|
||||
}
|
||||
|
||||
/// A structure storing all the different ways to match
|
||||
/// a term in the user's search query.
|
||||
#[derive(Clone)]
|
||||
pub struct WordDerivations {
|
||||
/// The original word
|
||||
pub original: Interned<String>,
|
||||
// TODO: pub prefix_of: Vec<String>,
|
||||
// TODO: original should only be used for debugging purposes?
|
||||
// TODO: pub zero_typo: Option<Interned<String>>,
|
||||
// TODO: pub prefix_of: Box<[Interned<String>]>,
|
||||
/// All the synonyms of the original word
|
||||
pub synonyms: Box<[Interned<Phrase>]>,
|
||||
|
||||
/// The original word split into multiple consecutive words
|
||||
pub split_words: Option<Interned<Phrase>>,
|
||||
|
||||
/// The original words and words which are prefixed by it
|
||||
pub zero_typo: Box<[Interned<String>]>,
|
||||
|
||||
/// Words that are 1 typo away from the original word
|
||||
pub one_typo: Box<[Interned<String>]>,
|
||||
|
||||
/// Words that are 2 typos away from the original word
|
||||
pub two_typos: Box<[Interned<String>]>,
|
||||
|
||||
/// True if the prefix databases must be used to retrieve
|
||||
/// the words which are prefixed by the original word.
|
||||
pub use_prefix_db: bool,
|
||||
}
|
||||
impl WordDerivations {
|
||||
/// Return an iterator over all the single words derived from the original word.
|
||||
///
|
||||
/// This excludes synonyms, split words, and words stored in the prefix databases.
|
||||
pub fn all_derivations_except_prefix_db(
|
||||
&'_ self,
|
||||
) -> impl Iterator<Item = Interned<String>> + Clone + '_ {
|
||||
@ -49,17 +68,20 @@ impl WordDerivations {
|
||||
self.zero_typo.is_empty()
|
||||
&& self.one_typo.is_empty()
|
||||
&& self.two_typos.is_empty()
|
||||
&& self.synonyms.is_empty()
|
||||
&& self.split_words.is_none()
|
||||
&& !self.use_prefix_db
|
||||
}
|
||||
}
|
||||
|
||||
/// Compute the word derivations for the given word
|
||||
pub fn word_derivations(
|
||||
ctx: &mut SearchContext,
|
||||
word: &str,
|
||||
max_typo: u8,
|
||||
is_prefix: bool,
|
||||
fst: &fst::Set<Cow<[u8]>>,
|
||||
) -> Result<WordDerivations> {
|
||||
let fst = ctx.index.words_fst(ctx.txn)?;
|
||||
let word_interned = ctx.word_interner.insert(word.to_owned());
|
||||
|
||||
let use_prefix_db = is_prefix
|
||||
@ -171,6 +193,10 @@ pub fn word_derivations(
|
||||
})
|
||||
}
|
||||
|
||||
/// Split the original word into the two words that appear the
|
||||
/// most next to each other in the index.
|
||||
///
|
||||
/// Return `None` if the original word cannot be split.
|
||||
fn split_best_frequency(
|
||||
index: &Index,
|
||||
txn: &RoTxn,
|
||||
@ -199,16 +225,12 @@ fn split_best_frequency(
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum QueryTerm {
|
||||
// TODO: should there be SplitWord, NGram2, and NGram3 variants?
|
||||
// NGram2 can have 1 typo and synonyms
|
||||
// NGram3 cannot have typos but can have synonyms
|
||||
// SplitWords are a phrase
|
||||
// Can NGrams be prefixes?
|
||||
Phrase { phrase: Interned<Phrase> },
|
||||
Word { derivations: WordDerivations },
|
||||
}
|
||||
|
||||
impl QueryTerm {
|
||||
/// Return the original word from the given query term
|
||||
pub fn original_single_word<'interner>(
|
||||
&self,
|
||||
word_interner: &'interner Interner<String>,
|
||||
@ -226,6 +248,7 @@ impl QueryTerm {
|
||||
}
|
||||
}
|
||||
|
||||
/// A query term term coupled with its position in the user's search query.
|
||||
#[derive(Clone)]
|
||||
pub struct LocatedQueryTerm {
|
||||
pub value: QueryTerm,
|
||||
@ -233,14 +256,18 @@ pub struct LocatedQueryTerm {
|
||||
}
|
||||
|
||||
impl LocatedQueryTerm {
|
||||
/// Return `true` iff the word derivations within the query term are empty
|
||||
pub fn is_empty(&self) -> bool {
|
||||
match &self.value {
|
||||
// TODO: phrases should be greedily computed, so that they can be excluded from
|
||||
// the query graph right from the start?
|
||||
QueryTerm::Phrase { phrase: _ } => false,
|
||||
QueryTerm::Word { derivations, .. } => derivations.is_empty(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// Convert the tokenised search query into a list of located query terms.
|
||||
pub fn located_query_terms_from_string<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
query: NormalizedTokenIter<Vec<u8>>,
|
||||
@ -250,8 +277,8 @@ pub fn located_query_terms_from_string<'search>(
|
||||
let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?;
|
||||
let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?;
|
||||
|
||||
// TODO: should `exact_words` also disable prefix search, ngrams, split words, or synonyms?
|
||||
let exact_words = ctx.index.exact_words(ctx.txn)?;
|
||||
let fst = ctx.index.words_fst(ctx.txn)?;
|
||||
|
||||
let nbr_typos = |word: &str| {
|
||||
if !authorize_typos
|
||||
@ -266,9 +293,9 @@ pub fn located_query_terms_from_string<'search>(
|
||||
}
|
||||
};
|
||||
|
||||
let mut primitive_query = Vec::new();
|
||||
let mut phrase = Vec::new();
|
||||
let mut located_terms = Vec::new();
|
||||
|
||||
let mut phrase = Vec::new();
|
||||
let mut quoted = false;
|
||||
|
||||
let parts_limit = words_limit.unwrap_or(usize::MAX);
|
||||
@ -280,8 +307,8 @@ pub fn located_query_terms_from_string<'search>(
|
||||
let mut peekable = query.peekable();
|
||||
while let Some(token) = peekable.next() {
|
||||
// early return if word limit is exceeded
|
||||
if primitive_query.len() >= parts_limit {
|
||||
return Ok(primitive_query);
|
||||
if located_terms.len() >= parts_limit {
|
||||
return Ok(located_terms);
|
||||
}
|
||||
|
||||
match token.kind {
|
||||
@ -307,24 +334,23 @@ pub fn located_query_terms_from_string<'search>(
|
||||
match token.kind {
|
||||
TokenKind::Word => {
|
||||
let word = token.lemma();
|
||||
let derivations =
|
||||
word_derivations(ctx, word, nbr_typos(word), false, &fst)?;
|
||||
let derivations = word_derivations(ctx, word, nbr_typos(word), false)?;
|
||||
let located_term = LocatedQueryTerm {
|
||||
value: QueryTerm::Word { derivations },
|
||||
positions: position..=position,
|
||||
};
|
||||
primitive_query.push(located_term);
|
||||
located_terms.push(located_term);
|
||||
}
|
||||
TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {}
|
||||
}
|
||||
} else {
|
||||
let word = token.lemma();
|
||||
let derivations = word_derivations(ctx, word, nbr_typos(word), true, &fst)?;
|
||||
let derivations = word_derivations(ctx, word, nbr_typos(word), true)?;
|
||||
let located_term = LocatedQueryTerm {
|
||||
value: QueryTerm::Word { derivations },
|
||||
positions: position..=position,
|
||||
};
|
||||
primitive_query.push(located_term);
|
||||
located_terms.push(located_term);
|
||||
}
|
||||
}
|
||||
TokenKind::Separator(separator_kind) => {
|
||||
@ -352,7 +378,7 @@ pub fn located_query_terms_from_string<'search>(
|
||||
},
|
||||
positions: phrase_start..=phrase_end,
|
||||
};
|
||||
primitive_query.push(located_query_term);
|
||||
located_terms.push(located_query_term);
|
||||
}
|
||||
}
|
||||
_ => (),
|
||||
@ -367,10 +393,10 @@ pub fn located_query_terms_from_string<'search>(
|
||||
},
|
||||
positions: phrase_start..=phrase_end,
|
||||
};
|
||||
primitive_query.push(located_query_term);
|
||||
located_terms.push(located_query_term);
|
||||
}
|
||||
|
||||
Ok(primitive_query)
|
||||
Ok(located_terms)
|
||||
}
|
||||
|
||||
// TODO: return a word derivations instead?
|
||||
@ -396,6 +422,8 @@ pub fn ngram2(
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: return a word derivations instead?
|
||||
pub fn ngram3(
|
||||
ctx: &mut SearchContext,
|
||||
x: &LocatedQueryTerm,
|
||||
|
@ -6,49 +6,43 @@ use crate::search::new::{QueryGraph, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
/// Build the ranking rule graph from the given query graph
|
||||
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
||||
let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph;
|
||||
|
||||
let mut all_edges = vec![];
|
||||
let mut node_edges = vec![];
|
||||
let mut successors = vec![];
|
||||
let mut edges_store = vec![];
|
||||
let mut edges_of_node = vec![];
|
||||
|
||||
for (node_idx, node) in graph_nodes.iter().enumerate() {
|
||||
node_edges.push(HashSet::new());
|
||||
successors.push(HashSet::new());
|
||||
let new_edges = node_edges.last_mut().unwrap();
|
||||
let new_successors = successors.last_mut().unwrap();
|
||||
edges_of_node.push(HashSet::new());
|
||||
let new_edges = edges_of_node.last_mut().unwrap();
|
||||
|
||||
let Some(from_node_data) = G::build_visit_from_node(ctx, node)? else { continue };
|
||||
let Some(source_node_data) = G::build_step_visit_source_node(ctx, node)? else { continue };
|
||||
|
||||
for successor_idx in graph_edges[node_idx].successors.iter() {
|
||||
let to_node = &graph_nodes[successor_idx as usize];
|
||||
let mut edges = G::build_visit_to_node(ctx, to_node, &from_node_data)?;
|
||||
let dest_node = &graph_nodes[successor_idx as usize];
|
||||
let edges =
|
||||
G::build_step_visit_destination_node(ctx, dest_node, &source_node_data)?;
|
||||
if edges.is_empty() {
|
||||
continue;
|
||||
}
|
||||
edges.sort_by_key(|e| e.0);
|
||||
|
||||
for (cost, details) in edges {
|
||||
all_edges.push(Some(Edge {
|
||||
from_node: node_idx as u16,
|
||||
to_node: successor_idx,
|
||||
edges_store.push(Some(Edge {
|
||||
source_node: node_idx as u16,
|
||||
dest_node: successor_idx,
|
||||
cost,
|
||||
details,
|
||||
condition: details,
|
||||
}));
|
||||
new_edges.insert(all_edges.len() as u16 - 1);
|
||||
new_successors.insert(successor_idx);
|
||||
new_edges.insert(edges_store.len() as u16 - 1);
|
||||
}
|
||||
}
|
||||
}
|
||||
let node_edges = node_edges
|
||||
let edges_of_node = edges_of_node
|
||||
.into_iter()
|
||||
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), all_edges.len() as u16))
|
||||
.collect();
|
||||
let successors = successors
|
||||
.into_iter()
|
||||
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), all_edges.len() as u16))
|
||||
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), edges_store.len() as u16))
|
||||
.collect();
|
||||
|
||||
Ok(RankingRuleGraph { query_graph, all_edges, node_edges, successors })
|
||||
Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node })
|
||||
}
|
||||
}
|
||||
|
@ -30,7 +30,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
empty_paths_cache,
|
||||
&mut visit,
|
||||
&mut vec![],
|
||||
&mut SmallBitmap::new(self.all_edges.len() as u16),
|
||||
&mut SmallBitmap::new(self.edges_store.len() as u16),
|
||||
empty_paths_cache.empty_edges.clone(),
|
||||
)?;
|
||||
Ok(())
|
||||
@ -48,12 +48,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
) -> Result<bool> {
|
||||
let mut any_valid = false;
|
||||
|
||||
let edges = self.node_edges[from].clone();
|
||||
let edges = self.edges_of_node[from].clone();
|
||||
for edge_idx in edges.iter() {
|
||||
let Some(edge) = self.all_edges[edge_idx as usize].as_ref() else { continue };
|
||||
let Some(edge) = self.edges_store[edge_idx as usize].as_ref() else { continue };
|
||||
if cost < edge.cost as u16
|
||||
|| forbidden_edges.contains(edge_idx)
|
||||
|| !all_distances[edge.to_node as usize].iter().any(
|
||||
|| !all_distances[edge.dest_node as usize].iter().any(
|
||||
|(next_cost, necessary_edges)| {
|
||||
(*next_cost == cost - edge.cost as u16)
|
||||
&& !forbidden_edges.intersects(necessary_edges)
|
||||
@ -71,13 +71,13 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
new_forbidden_edges.insert(x);
|
||||
});
|
||||
|
||||
let next_any_valid = if edge.to_node == self.query_graph.end_node {
|
||||
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
||||
any_valid = true;
|
||||
visit(prev_edges, self, empty_paths_cache)?;
|
||||
true
|
||||
} else {
|
||||
self.visit_paths_of_cost_rec(
|
||||
edge.to_node as usize,
|
||||
edge.dest_node as usize,
|
||||
cost - edge.cost as u16,
|
||||
all_distances,
|
||||
empty_paths_cache,
|
||||
@ -115,7 +115,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
let mut node_stack = VecDeque::new();
|
||||
|
||||
distances_to_end[self.query_graph.end_node as usize] =
|
||||
vec![(0, SmallBitmap::new(self.all_edges.len() as u16))];
|
||||
vec![(0, SmallBitmap::new(self.edges_store.len() as u16))];
|
||||
|
||||
for prev_node in
|
||||
self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter()
|
||||
@ -127,15 +127,15 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
while let Some(cur_node) = node_stack.pop_front() {
|
||||
let mut self_distances = BTreeMap::<u16, SmallBitmap>::new();
|
||||
|
||||
let cur_node_edges = &self.node_edges[cur_node];
|
||||
let cur_node_edges = &self.edges_of_node[cur_node];
|
||||
for edge_idx in cur_node_edges.iter() {
|
||||
let edge = self.all_edges[edge_idx as usize].as_ref().unwrap();
|
||||
let succ_node = edge.to_node;
|
||||
let edge = self.edges_store[edge_idx as usize].as_ref().unwrap();
|
||||
let succ_node = edge.dest_node;
|
||||
let succ_distances = &distances_to_end[succ_node as usize];
|
||||
for (succ_distance, succ_necessary_edges) in succ_distances {
|
||||
let potential_necessary_edges = SmallBitmap::from_iter(
|
||||
std::iter::once(edge_idx).chain(succ_necessary_edges.iter()),
|
||||
self.all_edges.len() as u16,
|
||||
self.edges_store.len() as u16,
|
||||
);
|
||||
match self_distances.entry(edge.cost as u16 + succ_distance) {
|
||||
Entry::Occupied(mut prev_necessary_edges) => {
|
||||
|
@ -3,28 +3,13 @@ use std::marker::PhantomData;
|
||||
use fxhash::FxHashMap;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::{BitmapOrAllRef, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
// TODO: the cache should have a G::EdgeDetails as key
|
||||
// but then it means that we should have a quick way of
|
||||
// computing their hash and comparing them
|
||||
// which can be done...
|
||||
// by using a pointer (real, Rc, bumpalo, or in a vector)???
|
||||
//
|
||||
// But actually.... the edge details' docids are a subset of the universe at the
|
||||
// moment they were computed.
|
||||
// But the universes between two iterations of a ranking rule are completely different
|
||||
// Thus, there is no point in doing this.
|
||||
// UNLESS...
|
||||
// we compute the whole docids corresponding to the edge details (potentially expensive in time and memory
|
||||
// in the common case)
|
||||
//
|
||||
// But we could still benefit within a single iteration for requests like:
|
||||
// `a a a a a a a a a` where we have many of the same edge details, repeated
|
||||
|
||||
/// A cache storing the document ids associated with each ranking rule edge
|
||||
pub struct EdgeDocidsCache<G: RankingRuleGraphTrait> {
|
||||
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
||||
pub cache: FxHashMap<u16, RoaringBitmap>,
|
||||
_phantom: PhantomData<G>,
|
||||
}
|
||||
@ -34,19 +19,24 @@ impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> {
|
||||
}
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
||||
/// Retrieve the document ids for the given edge condition.
|
||||
///
|
||||
/// If the cache does not yet contain these docids, they are computed
|
||||
/// and inserted in the cache.
|
||||
pub fn get_edge_docids<'s, 'search>(
|
||||
&'s mut self,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
// TODO: should be Interned<EdgeCondition>
|
||||
edge_index: u16,
|
||||
graph: &RankingRuleGraph<G>,
|
||||
// TODO: maybe universe doesn't belong here
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<BitmapOrAllRef<'s>> {
|
||||
let edge = graph.all_edges[edge_index as usize].as_ref().unwrap();
|
||||
let edge = graph.edges_store[edge_index as usize].as_ref().unwrap();
|
||||
|
||||
match &edge.details {
|
||||
EdgeDetails::Unconditional => Ok(BitmapOrAllRef::All),
|
||||
EdgeDetails::Data(details) => {
|
||||
match &edge.condition {
|
||||
EdgeCondition::Unconditional => Ok(BitmapOrAllRef::All),
|
||||
EdgeCondition::Conditional(details) => {
|
||||
if self.cache.contains_key(&edge_index) {
|
||||
// TODO: should we update the bitmap in the cache if the new universe
|
||||
// reduces it?
|
||||
@ -56,7 +46,7 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
||||
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
|
||||
}
|
||||
// TODO: maybe universe doesn't belong here
|
||||
let docids = universe & G::compute_docids(ctx, details, universe)?;
|
||||
let docids = universe & G::resolve_edge_condition(ctx, details, universe)?;
|
||||
let _ = self.cache.insert(edge_index, docids);
|
||||
let docids = &self.cache[&edge_index];
|
||||
Ok(BitmapOrAllRef::Bitmap(docids))
|
||||
|
@ -1,20 +1,29 @@
|
||||
use super::paths_map::PathsMap;
|
||||
use super::paths_map::PathSet;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
|
||||
/// A cache which stores sufficient conditions for a path
|
||||
/// to resolve to an empty set of candidates within the current
|
||||
/// universe.
|
||||
#[derive(Clone)]
|
||||
pub struct EmptyPathsCache {
|
||||
/// The set of edge indexes that resolve to no documents.
|
||||
pub empty_edges: SmallBitmap,
|
||||
pub empty_prefixes: PathsMap<()>,
|
||||
/// A set of path prefixes that resolve to no documents.
|
||||
pub empty_prefixes: PathSet,
|
||||
/// A set of empty couple of edge indexes that resolve to no documents.
|
||||
pub empty_couple_edges: Vec<SmallBitmap>,
|
||||
}
|
||||
impl EmptyPathsCache {
|
||||
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
||||
pub fn new(all_edges_len: u16) -> Self {
|
||||
Self {
|
||||
empty_edges: SmallBitmap::new(all_edges_len),
|
||||
empty_prefixes: PathsMap::default(),
|
||||
empty_prefixes: PathSet::default(),
|
||||
empty_couple_edges: vec![SmallBitmap::new(all_edges_len); all_edges_len as usize],
|
||||
}
|
||||
}
|
||||
|
||||
/// Store in the cache that every path containing the given edge resolves to no documents.
|
||||
pub fn forbid_edge(&mut self, edge_idx: u16) {
|
||||
self.empty_edges.insert(edge_idx);
|
||||
self.empty_couple_edges[edge_idx as usize].clear();
|
||||
@ -23,12 +32,17 @@ impl EmptyPathsCache {
|
||||
edges2.remove(edge_idx);
|
||||
}
|
||||
}
|
||||
/// Store in the cache that every path containing the given prefix resolves to no documents.
|
||||
pub fn forbid_prefix(&mut self, prefix: &[u16]) {
|
||||
self.empty_prefixes.insert(prefix.iter().copied(), ());
|
||||
self.empty_prefixes.insert(prefix.iter().copied());
|
||||
}
|
||||
|
||||
/// Store in the cache that every path containing the two given edges resolves to no documents.
|
||||
pub fn forbid_couple_edges(&mut self, edge1: u16, edge2: u16) {
|
||||
self.empty_couple_edges[edge1 as usize].insert(edge2);
|
||||
}
|
||||
|
||||
/// Returns true if the cache can determine that the given path resolves to no documents.
|
||||
pub fn path_is_empty(&self, path: &[u16], path_bitmap: &SmallBitmap) -> bool {
|
||||
if path_bitmap.intersects(&self.empty_edges) {
|
||||
return true;
|
||||
|
@ -1,9 +1,19 @@
|
||||
/*! Module implementing the graph used for the graph-based ranking rules
|
||||
and its related algorithms.
|
||||
|
||||
A ranking rule graph is built on top of the [`QueryGraph`]: the nodes stay
|
||||
the same but the edges are replaced.
|
||||
*/
|
||||
|
||||
mod build;
|
||||
mod cheapest_paths;
|
||||
mod edge_docids_cache;
|
||||
mod empty_paths_cache;
|
||||
mod paths_map;
|
||||
|
||||
/// Implementation of the `proximity` ranking rule
|
||||
mod proximity;
|
||||
/// Implementation of the `typo` ranking rule
|
||||
mod typo;
|
||||
|
||||
pub use edge_docids_cache::EdgeDocidsCache;
|
||||
@ -17,30 +27,38 @@ use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
/// The condition that is associated with an edge in the ranking rule graph.
|
||||
///
|
||||
/// Some edges are unconditional, which means that traversing them does not reduce
|
||||
/// the set of candidates.
|
||||
///
|
||||
/// Most edges, however, have a condition attached to them. For example, for the
|
||||
/// proximity ranking rule, the condition could be that a word is N-close to another one.
|
||||
/// When the edge is traversed, some database operations are executed to retrieve the set
|
||||
/// of documents that satisfy the condition, which reduces the list of candidate document ids.
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum EdgeDetails<E> {
|
||||
pub enum EdgeCondition<E> {
|
||||
Unconditional,
|
||||
Data(E),
|
||||
Conditional(E),
|
||||
}
|
||||
|
||||
/// An edge in the ranking rule graph.
|
||||
///
|
||||
/// It contains:
|
||||
/// 1. The source and destination nodes
|
||||
/// 2. The cost of traversing this edge
|
||||
/// 3. The condition associated with it
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Edge<E> {
|
||||
pub from_node: u16,
|
||||
pub to_node: u16,
|
||||
pub source_node: u16,
|
||||
pub dest_node: u16,
|
||||
pub cost: u8,
|
||||
pub details: EdgeDetails<E>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct EdgePointer<'graph, E> {
|
||||
pub index: u16,
|
||||
pub edge: &'graph Edge<E>,
|
||||
pub condition: EdgeCondition<E>,
|
||||
}
|
||||
|
||||
// pub struct SubWordDerivations {
|
||||
// words: FxHashSet<Interned<String>>,
|
||||
// synonyms: FxHashSet<Interned<Phrase>>, // NO! they're phrases, not strings
|
||||
// split_words: bool,
|
||||
// phrases: FxHashSet<Interned<Phrase>>,
|
||||
// use_prefix_db: bool,
|
||||
// }
|
||||
|
||||
@ -74,46 +92,55 @@ pub struct EdgePointer<'graph, E> {
|
||||
// }
|
||||
|
||||
// fn word_derivations_used_by_edge<G: RankingRuleGraphTrait>(
|
||||
// edge: G::EdgeDetails,
|
||||
// edge: G::EdgeCondition,
|
||||
// ) -> SubWordDerivations {
|
||||
// todo!()
|
||||
// }
|
||||
|
||||
/// A trait to be implemented by a marker type to build a graph-based ranking rule.
|
||||
///
|
||||
/// It mostly describes how to:
|
||||
/// 1. Retrieve the set of edges (their cost and condition) between two nodes.
|
||||
/// 2. Compute the document ids satisfying a condition
|
||||
pub trait RankingRuleGraphTrait: Sized {
|
||||
/// The details of an edge connecting two query nodes. These details
|
||||
/// The condition of an edge connecting two query nodes. The condition
|
||||
/// should be sufficient to compute the edge's cost and associated document ids
|
||||
/// in [`compute_docids`](RankingRuleGraphTrait).
|
||||
type EdgeDetails: Sized + Clone;
|
||||
/// in [`resolve_edge_condition`](RankingRuleGraphTrait::resolve_edge_condition).
|
||||
type EdgeCondition: Sized + Clone;
|
||||
|
||||
/// A structure used in the construction of the graph, created when a
|
||||
/// query graph source node is visited. It is used to determine the cost
|
||||
/// and condition of a ranking rule edge when the destination node is visited.
|
||||
type BuildVisitedFromNode;
|
||||
|
||||
/// Return the label of the given edge details, to be used when visualising
|
||||
/// the ranking rule graph using GraphViz.
|
||||
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String;
|
||||
/// Return the label of the given edge condition, to be used when visualising
|
||||
/// the ranking rule graph.
|
||||
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String;
|
||||
|
||||
/// Compute the document ids associated with the given edge.
|
||||
fn compute_docids<'search>(
|
||||
/// Compute the document ids associated with the given edge condition,
|
||||
/// restricted to the given universe.
|
||||
fn resolve_edge_condition<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge_details: &Self::EdgeDetails,
|
||||
edge_condition: &Self::EdgeCondition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<RoaringBitmap>;
|
||||
|
||||
/// Prepare to build the edges outgoing from `from_node`.
|
||||
/// Prepare to build the edges outgoing from `source_node`.
|
||||
///
|
||||
/// This call is followed by zero, one or more calls to [`build_visit_to_node`](RankingRuleGraphTrait::build_visit_to_node),
|
||||
/// This call is followed by zero, one or more calls to [`build_step_visit_destination_node`](RankingRuleGraphTrait::build_step_visit_destination_node),
|
||||
/// which builds the actual edges.
|
||||
fn build_visit_from_node<'search>(
|
||||
fn build_step_visit_source_node<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
from_node: &QueryNode,
|
||||
source_node: &QueryNode,
|
||||
) -> Result<Option<Self::BuildVisitedFromNode>>;
|
||||
|
||||
/// Return the cost and details of the edges going from the previously visited node
|
||||
/// (with [`build_visit_from_node`](RankingRuleGraphTrait::build_visit_from_node)) to `to_node`.
|
||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
||||
/// Return the cost and condition of the edges going from the previously visited node
|
||||
/// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`.
|
||||
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>;
|
||||
dest_node: &QueryNode,
|
||||
source_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>;
|
||||
|
||||
fn log_state(
|
||||
graph: &RankingRuleGraph<Self>,
|
||||
@ -126,45 +153,32 @@ pub trait RankingRuleGraphTrait: Sized {
|
||||
);
|
||||
}
|
||||
|
||||
/// The graph used by graph-based ranking rules.
|
||||
///
|
||||
/// It is built on top of a [`QueryGraph`], keeping the same nodes
|
||||
/// but replacing the edges.
|
||||
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
||||
pub query_graph: QueryGraph,
|
||||
// pub edges: Vec<HashMap<usize, Vec<Edge<G::EdgeDetails>>>>,
|
||||
pub all_edges: Vec<Option<Edge<G::EdgeDetails>>>,
|
||||
|
||||
pub node_edges: Vec<SmallBitmap>,
|
||||
|
||||
pub successors: Vec<SmallBitmap>,
|
||||
// TODO: to get the edges between two nodes:
|
||||
// 1. get node_outgoing_edges[from]
|
||||
// 2. get node_incoming_edges[to]
|
||||
// 3. take intersection betweem the two
|
||||
pub edges_store: Vec<Option<Edge<G::EdgeCondition>>>,
|
||||
pub edges_of_node: Vec<SmallBitmap>,
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
query_graph: self.query_graph.clone(),
|
||||
all_edges: self.all_edges.clone(),
|
||||
node_edges: self.node_edges.clone(),
|
||||
successors: self.successors.clone(),
|
||||
edges_store: self.edges_store.clone(),
|
||||
edges_of_node: self.edges_of_node.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
pub fn remove_edge(&mut self, edge_index: u16) {
|
||||
let edge_opt = &mut self.all_edges[edge_index as usize];
|
||||
/// Remove the given edge from the ranking rule graph
|
||||
pub fn remove_ranking_rule_edge(&mut self, edge_index: u16) {
|
||||
let edge_opt = &mut self.edges_store[edge_index as usize];
|
||||
let Some(edge) = &edge_opt else { return };
|
||||
let (from_node, _to_node) = (edge.from_node, edge.to_node);
|
||||
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
||||
*edge_opt = None;
|
||||
|
||||
let from_node_edges = &mut self.node_edges[from_node as usize];
|
||||
from_node_edges.remove(edge_index);
|
||||
|
||||
let mut new_successors_from_node = SmallBitmap::new(self.all_edges.len() as u16);
|
||||
let all_edges = &self.all_edges;
|
||||
for from_node_edge in from_node_edges.iter() {
|
||||
let Edge { to_node, .. } = &all_edges[from_node_edge as usize].as_ref().unwrap();
|
||||
new_successors_from_node.insert(*to_node);
|
||||
}
|
||||
self.successors[from_node as usize] = new_successors_from_node;
|
||||
self.edges_of_node[source_node as usize].remove(edge_index);
|
||||
}
|
||||
}
|
||||
|
@ -1,117 +1,32 @@
|
||||
use super::cheapest_paths::Path;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
|
||||
// What is PathsMap used for?
|
||||
// What is PathSet used for?
|
||||
// For the empty_prefixes field in the EmptyPathsCache only :/
|
||||
// but it could be used for more, like efficient computing of a set of paths
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PathsMap<V> {
|
||||
pub nodes: Vec<(u16, PathsMap<V>)>,
|
||||
pub value: Option<V>,
|
||||
/// A set of [`Path`]
|
||||
#[derive(Default, Debug, Clone)]
|
||||
pub struct PathSet {
|
||||
nodes: Vec<(u16, PathSet)>,
|
||||
is_end: bool,
|
||||
}
|
||||
impl<V> Default for PathsMap<V> {
|
||||
fn default() -> Self {
|
||||
Self { nodes: vec![], value: None }
|
||||
}
|
||||
}
|
||||
|
||||
impl PathsMap<u64> {
|
||||
pub fn from_paths(paths: &[Path]) -> Self {
|
||||
let mut result = Self::default();
|
||||
for p in paths {
|
||||
result.add_path(p);
|
||||
}
|
||||
result
|
||||
}
|
||||
pub fn add_path(&mut self, path: &Path) {
|
||||
self.insert(path.edges.iter().copied(), path.cost);
|
||||
}
|
||||
}
|
||||
impl<V> PathsMap<V> {
|
||||
pub fn is_empty(&self) -> bool {
|
||||
self.nodes.is_empty() && self.value.is_none()
|
||||
}
|
||||
|
||||
pub fn insert(&mut self, mut edges: impl Iterator<Item = u16>, value: V) {
|
||||
impl PathSet {
|
||||
pub fn insert(&mut self, mut edges: impl Iterator<Item = u16>) {
|
||||
match edges.next() {
|
||||
None => {
|
||||
self.value = Some(value);
|
||||
self.is_end = true;
|
||||
}
|
||||
Some(first_edge) => {
|
||||
// comment
|
||||
for (edge, next_node) in &mut self.nodes {
|
||||
if edge == &first_edge {
|
||||
return next_node.insert(edges, value);
|
||||
return next_node.insert(edges);
|
||||
}
|
||||
}
|
||||
let mut rest = PathsMap::default();
|
||||
rest.insert(edges, value);
|
||||
let mut rest = PathSet::default();
|
||||
rest.insert(edges);
|
||||
self.nodes.push((first_edge, rest));
|
||||
}
|
||||
}
|
||||
}
|
||||
fn remove_first_rec(&mut self, cur: &mut Vec<u16>) -> (bool, V) {
|
||||
let Some((first_edge, rest)) = self.nodes.first_mut() else {
|
||||
// The PathsMap has to be correct by construction here, otherwise
|
||||
// the unwrap() will crash
|
||||
return (true, self.value.take().unwrap())
|
||||
};
|
||||
cur.push(*first_edge);
|
||||
let (rest_is_empty, value) = rest.remove_first_rec(cur);
|
||||
if rest_is_empty {
|
||||
self.nodes.remove(0);
|
||||
(self.nodes.is_empty(), value)
|
||||
} else {
|
||||
(false, value)
|
||||
}
|
||||
}
|
||||
pub fn remove_first(&mut self) -> Option<(Vec<u16>, V)> {
|
||||
if self.is_empty() {
|
||||
return None;
|
||||
}
|
||||
|
||||
let mut result = vec![];
|
||||
let (_, value) = self.remove_first_rec(&mut result);
|
||||
Some((result, value))
|
||||
}
|
||||
pub fn iterate_rec(&self, cur: &mut Vec<u16>, visit: &mut impl FnMut(&Vec<u16>, &V)) {
|
||||
if let Some(value) = &self.value {
|
||||
visit(cur, value);
|
||||
}
|
||||
for (first_edge, rest) in self.nodes.iter() {
|
||||
cur.push(*first_edge);
|
||||
rest.iterate_rec(cur, visit);
|
||||
cur.pop();
|
||||
}
|
||||
}
|
||||
pub fn iterate(&self, mut visit: impl FnMut(&Vec<u16>, &V)) {
|
||||
self.iterate_rec(&mut vec![], &mut visit)
|
||||
}
|
||||
|
||||
pub fn remove_prefixes<U>(&mut self, prefixes: &PathsMap<U>) {
|
||||
prefixes.iterate(|prefix, _v| {
|
||||
self.remove_prefix(prefix);
|
||||
});
|
||||
}
|
||||
pub fn remove_edges(&mut self, forbidden_edges: &SmallBitmap) {
|
||||
let mut i = 0;
|
||||
while i < self.nodes.len() {
|
||||
let should_remove = if forbidden_edges.contains(self.nodes[i].0) {
|
||||
true
|
||||
} else if !self.nodes[i].1.nodes.is_empty() {
|
||||
self.nodes[i].1.remove_edges(forbidden_edges);
|
||||
self.nodes[i].1.nodes.is_empty()
|
||||
} else {
|
||||
false
|
||||
};
|
||||
if should_remove {
|
||||
self.nodes.remove(i);
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn remove_edge(&mut self, forbidden_edge: &u16) {
|
||||
let mut i = 0;
|
||||
while i < self.nodes.len() {
|
||||
@ -130,34 +45,11 @@ impl<V> PathsMap<V> {
|
||||
}
|
||||
}
|
||||
}
|
||||
pub fn remove_prefix(&mut self, forbidden_prefix: &[u16]) {
|
||||
let [first_edge, remaining_prefix @ ..] = forbidden_prefix else {
|
||||
self.nodes.clear();
|
||||
self.value = None;
|
||||
return;
|
||||
};
|
||||
|
||||
let mut i = 0;
|
||||
while i < self.nodes.len() {
|
||||
let edge = self.nodes[i].0;
|
||||
let should_remove = if edge == *first_edge {
|
||||
self.nodes[i].1.remove_prefix(remaining_prefix);
|
||||
self.nodes[i].1.nodes.is_empty()
|
||||
} else {
|
||||
false
|
||||
};
|
||||
if should_remove {
|
||||
self.nodes.remove(i);
|
||||
} else {
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub fn final_edges_after_prefix(&self, prefix: &[u16], visit: &mut impl FnMut(u16)) {
|
||||
let [first_edge, remaining_prefix @ ..] = prefix else {
|
||||
for node in self.nodes.iter() {
|
||||
if node.1.value.is_some() {
|
||||
if node.1.is_end {
|
||||
visit(node.0)
|
||||
}
|
||||
}
|
||||
@ -170,20 +62,8 @@ impl<V> PathsMap<V> {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn edge_indices_after_prefix(&self, prefix: &[u16]) -> Vec<u16> {
|
||||
let [first_edge, remaining_prefix @ ..] = prefix else {
|
||||
return self.nodes.iter().map(|n| n.0).collect();
|
||||
};
|
||||
for (edge, rest) in self.nodes.iter() {
|
||||
if edge == first_edge {
|
||||
return rest.edge_indices_after_prefix(remaining_prefix);
|
||||
}
|
||||
}
|
||||
vec![]
|
||||
}
|
||||
|
||||
pub fn contains_prefix_of_path(&self, path: &[u16]) -> bool {
|
||||
if self.value.is_some() {
|
||||
if self.is_end {
|
||||
return true;
|
||||
}
|
||||
match path {
|
||||
|
@ -5,7 +5,7 @@ use itertools::Itertools;
|
||||
use super::ProximityEdge;
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
||||
use crate::search::new::ranking_rule_graph::EdgeDetails;
|
||||
use crate::search::new::ranking_rule_graph::EdgeCondition;
|
||||
use crate::search::new::{QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
@ -57,10 +57,10 @@ pub fn visit_to_node<'search, 'from_data>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
from_node_data: &'from_data (WordDerivations, i8),
|
||||
) -> Result<Vec<(u8, EdgeDetails<ProximityEdge>)>> {
|
||||
) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> {
|
||||
let (derivations1, pos1) = from_node_data;
|
||||
let term2 = match &to_node {
|
||||
QueryNode::End => return Ok(vec![(0, EdgeDetails::Unconditional)]),
|
||||
QueryNode::End => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNode::Deleted | QueryNode::Start => return Ok(vec![]),
|
||||
QueryNode::Term(term) => term,
|
||||
};
|
||||
@ -96,7 +96,7 @@ pub fn visit_to_node<'search, 'from_data>(
|
||||
// We want to effectively ignore this pair of terms
|
||||
// Unconditionally walk through the edge without computing the docids
|
||||
// But also what should the cost be?
|
||||
return Ok(vec![(0, EdgeDetails::Unconditional)]);
|
||||
return Ok(vec![(0, EdgeCondition::Unconditional)]);
|
||||
}
|
||||
|
||||
let updb1 = derivations1.use_prefix_db;
|
||||
@ -189,7 +189,7 @@ pub fn visit_to_node<'search, 'from_data>(
|
||||
for (proximity, word_pairs) in proximity_word_pairs {
|
||||
edges.push((
|
||||
cost,
|
||||
EdgeDetails::Data(ProximityEdge {
|
||||
EdgeCondition::Conditional(ProximityEdge {
|
||||
pairs: word_pairs.into_boxed_slice(),
|
||||
proximity,
|
||||
}),
|
||||
@ -198,6 +198,6 @@ pub fn visit_to_node<'search, 'from_data>(
|
||||
edges
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
new_edges.push((8 + (ngram_len2 - 1) as u8, EdgeDetails::Unconditional));
|
||||
new_edges.push((8 + (ngram_len2 - 1) as u8, EdgeCondition::Unconditional));
|
||||
Ok(new_edges)
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ pub mod compute_docids;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
use super::{EdgeDetails, RankingRuleGraphTrait};
|
||||
use super::{EdgeCondition, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::Interned;
|
||||
use crate::search::new::logger::SearchLogger;
|
||||
use crate::search::new::query_term::WordDerivations;
|
||||
@ -30,34 +30,34 @@ pub struct ProximityEdge {
|
||||
pub enum ProximityGraph {}
|
||||
|
||||
impl RankingRuleGraphTrait for ProximityGraph {
|
||||
type EdgeDetails = ProximityEdge;
|
||||
type EdgeCondition = ProximityEdge;
|
||||
type BuildVisitedFromNode = (WordDerivations, i8);
|
||||
|
||||
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String {
|
||||
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String {
|
||||
let ProximityEdge { pairs, proximity } = edge;
|
||||
format!(", prox {proximity}, {} pairs", pairs.len())
|
||||
}
|
||||
|
||||
fn compute_docids<'search>(
|
||||
fn resolve_edge_condition<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge: &Self::EdgeDetails,
|
||||
edge: &Self::EdgeCondition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<roaring::RoaringBitmap> {
|
||||
compute_docids::compute_docids(ctx, edge, universe)
|
||||
}
|
||||
|
||||
fn build_visit_from_node<'search>(
|
||||
fn build_step_visit_source_node<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
from_node: &QueryNode,
|
||||
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
||||
build::visit_from_node(ctx, from_node)
|
||||
}
|
||||
|
||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
||||
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
|
||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||
build::visit_to_node(ctx, to_node, from_node_data)
|
||||
}
|
||||
|
||||
|
@ -2,7 +2,7 @@ use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::Interned;
|
||||
use crate::search::new::logger::SearchLogger;
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
||||
@ -20,19 +20,19 @@ pub enum TypoEdge {
|
||||
pub enum TypoGraph {}
|
||||
|
||||
impl RankingRuleGraphTrait for TypoGraph {
|
||||
type EdgeDetails = TypoEdge;
|
||||
type EdgeCondition = TypoEdge;
|
||||
type BuildVisitedFromNode = ();
|
||||
|
||||
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String {
|
||||
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String {
|
||||
match edge {
|
||||
TypoEdge::Phrase { .. } => ", 0 typos".to_owned(),
|
||||
TypoEdge::Word { nbr_typos, .. } => format!(", {nbr_typos} typos"),
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_docids<'db_cache, 'search>(
|
||||
fn resolve_edge_condition<'db_cache, 'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge: &Self::EdgeDetails,
|
||||
edge: &Self::EdgeCondition,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<RoaringBitmap> {
|
||||
match edge {
|
||||
@ -66,29 +66,29 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_visit_from_node<'search>(
|
||||
fn build_step_visit_source_node<'search>(
|
||||
_ctx: &mut SearchContext<'search>,
|
||||
_from_node: &QueryNode,
|
||||
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
||||
Ok(Some(()))
|
||||
}
|
||||
|
||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
||||
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
|
||||
_ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
_from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
|
||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||
match to_node {
|
||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
|
||||
&QueryTerm::Phrase { phrase } => {
|
||||
Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase { phrase }))])
|
||||
Ok(vec![(0, EdgeCondition::Conditional(TypoEdge::Phrase { phrase }))])
|
||||
}
|
||||
QueryTerm::Word { derivations } => {
|
||||
let mut edges = vec![];
|
||||
if !derivations.zero_typo.is_empty() || derivations.use_prefix_db {
|
||||
edges.push((
|
||||
0,
|
||||
EdgeDetails::Data(TypoEdge::Word {
|
||||
EdgeCondition::Conditional(TypoEdge::Word {
|
||||
derivations: derivations.clone(),
|
||||
nbr_typos: 0,
|
||||
}),
|
||||
@ -97,7 +97,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
if !derivations.one_typo.is_empty() {
|
||||
edges.push((
|
||||
1,
|
||||
EdgeDetails::Data(TypoEdge::Word {
|
||||
EdgeCondition::Conditional(TypoEdge::Word {
|
||||
derivations: derivations.clone(),
|
||||
nbr_typos: 1,
|
||||
}),
|
||||
@ -106,7 +106,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
if !derivations.two_typos.is_empty() {
|
||||
edges.push((
|
||||
2,
|
||||
EdgeDetails::Data(TypoEdge::Word {
|
||||
EdgeCondition::Conditional(TypoEdge::Word {
|
||||
derivations: derivations.clone(),
|
||||
nbr_typos: 2,
|
||||
}),
|
||||
@ -115,7 +115,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
Ok(edges)
|
||||
}
|
||||
},
|
||||
QueryNode::End => Ok(vec![(0, EdgeDetails::Unconditional)]),
|
||||
QueryNode::End => Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNode::Deleted | QueryNode::Start => panic!(),
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user