Replace EdgeCondition with an Option<..> + other code cleanup

This commit is contained in:
Loïc Lecrenier 2023-03-16 11:52:51 +01:00
parent 7b1d8f4c6d
commit aa59c3bc2c
16 changed files with 202 additions and 204 deletions

View File

@ -20,10 +20,17 @@ pub struct DistinctOutput {
pub excluded: RoaringBitmap, pub excluded: RoaringBitmap,
} }
/// Return a [`DistinctOutput`] containing:
/// - `remaining`: a set of docids built such that exactly one element from `candidates`
/// is kept for each distinct value inside the given field. If the field does not exist, it
/// is considered unique.
/// - `excluded`: the set of document ids that contain a value for the given field that occurs
/// in the given candidates.
pub fn apply_distinct_rule<'ctx>( pub fn apply_distinct_rule<'ctx>(
ctx: &mut SearchContext<'ctx>, ctx: &mut SearchContext<'ctx>,
field_id: u16, field_id: u16,
candidates: &RoaringBitmap, candidates: &RoaringBitmap,
// TODO: add a universe here, such that the `excluded` are a subset of the universe?
) -> Result<DistinctOutput> { ) -> Result<DistinctOutput> {
let mut excluded = RoaringBitmap::new(); let mut excluded = RoaringBitmap::new();
let mut remaining = RoaringBitmap::new(); let mut remaining = RoaringBitmap::new();
@ -37,6 +44,7 @@ pub fn apply_distinct_rule<'ctx>(
Ok(DistinctOutput { remaining, excluded }) Ok(DistinctOutput { remaining, excluded })
} }
/// Apply the distinct rule defined by [`apply_distinct_rule`] for a single document id.
fn distinct_single_docid( fn distinct_single_docid(
index: &Index, index: &Index,
txn: &RoTxn, txn: &RoTxn,
@ -69,6 +77,7 @@ fn distinct_single_docid(
Ok(()) Ok(())
} }
/// Return all the docids containing the given value in the given field
fn facet_value_docids( fn facet_value_docids(
database: Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>, database: Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
txn: &RoTxn, txn: &RoTxn,
@ -79,13 +88,15 @@ fn facet_value_docids(
.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value }) .get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value })
.map(|opt| opt.map(|v| v.bitmap)) .map(|opt| opt.map(|v| v.bitmap))
} }
/// Return an iterator over each number value in the given field of the given document.
fn facet_number_values<'a>( fn facet_number_values<'a>(
id: u32, docid: u32,
distinct: u16, field_id: u16,
index: &Index, index: &Index,
txn: &'a RoTxn, txn: &'a RoTxn,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Unit>> { ) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Unit>> {
let key = facet_values_prefix_key(distinct, id); let key = facet_values_prefix_key(field_id, docid);
let iter = index let iter = index
.field_id_docid_facet_f64s .field_id_docid_facet_f64s
@ -96,13 +107,14 @@ fn facet_number_values<'a>(
Ok(iter) Ok(iter)
} }
/// Return an iterator over each string value in the given field of the given document.
fn facet_string_values<'a>( fn facet_string_values<'a>(
docid: u32, docid: u32,
distinct: u16, field_id: u16,
index: &Index, index: &Index,
txn: &'a RoTxn, txn: &'a RoTxn,
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Str>> { ) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Str>> {
let key = facet_values_prefix_key(distinct, docid); let key = facet_values_prefix_key(field_id, docid);
let iter = index let iter = index
.field_id_docid_facet_strings .field_id_docid_facet_strings

View File

@ -45,7 +45,7 @@ use super::interner::MappedInterner;
use super::logger::SearchLogger; use super::logger::SearchLogger;
use super::query_graph::QueryNode; use super::query_graph::QueryNode;
use super::ranking_rule_graph::{ use super::ranking_rule_graph::{
DeadEndPathCache, EdgeCondition, EdgeConditionDocIdsCache, ProximityGraph, RankingRuleGraph, DeadEndPathCache, EdgeConditionDocIdsCache, ProximityGraph, RankingRuleGraph,
RankingRuleGraphTrait, TypoGraph, RankingRuleGraphTrait, TypoGraph,
}; };
use super::small_bitmap::SmallBitmap; use super::small_bitmap::SmallBitmap;
@ -87,7 +87,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
/// Cache to retrieve the docids associated with each edge /// Cache to retrieve the docids associated with each edge
edge_conditions_cache: EdgeConditionDocIdsCache<G>, edge_conditions_cache: EdgeConditionDocIdsCache<G>,
/// Cache used to optimistically discard paths that resolve to no documents. /// Cache used to optimistically discard paths that resolve to no documents.
empty_paths_cache: DeadEndPathCache<G>, dead_end_path_cache: DeadEndPathCache<G>,
/// A structure giving the list of possible costs from each node to the end node, /// A structure giving the list of possible costs from each node to the end node,
/// along with a set of unavoidable edges that must be traversed to achieve that distance. /// along with a set of unavoidable edges that must be traversed to achieve that distance.
all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>, all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
@ -101,29 +101,25 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>( fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>(
ctx: &mut SearchContext<'ctx>, ctx: &mut SearchContext<'ctx>,
graph: &mut RankingRuleGraph<G>, graph: &mut RankingRuleGraph<G>,
edge_docids_cache: &mut EdgeConditionDocIdsCache<G>, condition_docids_cache: &mut EdgeConditionDocIdsCache<G>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
empty_paths_cache: &mut DeadEndPathCache<G>, dead_end_path_cache: &mut DeadEndPathCache<G>,
) -> Result<()> { ) -> Result<()> {
for edge_id in graph.edges_store.indexes() { for edge_id in graph.edges_store.indexes() {
let Some(edge) = graph.edges_store.get(edge_id).as_ref() else { let Some(edge) = graph.edges_store.get(edge_id).as_ref() else {
continue; continue;
}; };
let condition = edge.condition; let Some(condition) = edge.condition else { continue };
match condition { let docids =
EdgeCondition::Unconditional => continue, condition_docids_cache.get_condition_docids(ctx, condition, graph, universe)?;
EdgeCondition::Conditional(condition) => {
let docids = edge_docids_cache.get_edge_docids(ctx, condition, graph, universe)?;
if docids.is_disjoint(universe) { if docids.is_disjoint(universe) {
graph.remove_edges_with_condition(condition); graph.remove_edges_with_condition(condition);
empty_paths_cache.add_condition(condition); dead_end_path_cache.add_condition(condition);
edge_docids_cache.cache.remove(&condition); condition_docids_cache.cache.remove(&condition);
continue; continue;
} }
} }
}
}
Ok(()) Ok(())
} }
@ -139,17 +135,17 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
query_graph: &QueryGraph, query_graph: &QueryGraph,
) -> Result<()> { ) -> Result<()> {
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?; let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
let mut edge_docids_cache = EdgeConditionDocIdsCache::default(); let mut condition_docids_cache = EdgeConditionDocIdsCache::default();
let mut empty_paths_cache = DeadEndPathCache::new(&graph.conditions_interner); let mut dead_end_path_cache = DeadEndPathCache::new(&graph.conditions_interner);
// First simplify the graph as much as possible, by computing the docids of the edges // First simplify the graph as much as possible, by computing the docids of all the conditions
// within the rule's universe and removing the edges that have no associated docids. // within the rule's universe and removing the edges that have no associated docids.
remove_empty_edges( remove_empty_edges(
ctx, ctx,
&mut graph, &mut graph,
&mut edge_docids_cache, &mut condition_docids_cache,
universe, universe,
&mut empty_paths_cache, &mut dead_end_path_cache,
)?; )?;
// Then pre-compute the cost of all paths from each node to the end node // Then pre-compute the cost of all paths from each node to the end node
@ -157,8 +153,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
let state = GraphBasedRankingRuleState { let state = GraphBasedRankingRuleState {
graph, graph,
edge_conditions_cache: edge_docids_cache, edge_conditions_cache: condition_docids_cache,
empty_paths_cache, dead_end_path_cache,
all_distances, all_distances,
cur_distance_idx: 0, cur_distance_idx: 0,
}; };
@ -187,7 +183,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
&mut state.graph, &mut state.graph,
&mut state.edge_conditions_cache, &mut state.edge_conditions_cache,
universe, universe,
&mut state.empty_paths_cache, &mut state.dead_end_path_cache,
)?; )?;
// If the cur_distance_idx does not point to a valid cost in the `all_distances` // If the cur_distance_idx does not point to a valid cost in the `all_distances`
@ -208,8 +204,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
let GraphBasedRankingRuleState { let GraphBasedRankingRuleState {
graph, graph,
edge_conditions_cache: edge_docids_cache, edge_conditions_cache: condition_docids_cache,
empty_paths_cache, dead_end_path_cache,
all_distances, all_distances,
cur_distance_idx: _, cur_distance_idx: _,
} = &mut state; } = &mut state;
@ -224,18 +220,18 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// For each path of the given cost, we will compute its associated // For each path of the given cost, we will compute its associated
// document ids. // document ids.
// In case the path does not resolve to any document id, we try to figure out why // In case the path does not resolve to any document id, we try to figure out why
// and update the `empty_paths_cache` accordingly. // and update the `dead_end_path_cache` accordingly.
// For example, it may be that the path is empty because one of its edges is disjoint // For example, it may be that the path is empty because one of its edges is disjoint
// with the universe, or because a prefix of the path is disjoint with the universe, or because // with the universe, or because a prefix of the path is disjoint with the universe, or because
// the path contains two edges that are disjoint from each other within the universe. // the path contains two edges that are disjoint from each other within the universe.
// Updating the empty_paths_cache helps speed up the execution of `visit_paths_of_cost` and reduces // Updating the dead_end_path_cache helps speed up the execution of `visit_paths_of_cost` and reduces
// the number of future candidate paths given by that same function. // the number of future candidate paths given by that same function.
graph.visit_paths_of_cost( graph.visit_paths_of_cost(
graph.query_graph.root_node, graph.query_graph.root_node,
cost, cost,
all_distances, all_distances,
empty_paths_cache, dead_end_path_cache,
|path, graph, empty_paths_cache| { |path, graph, dead_end_path_cache| {
// Accumulate the path for logging purposes only // Accumulate the path for logging purposes only
paths.push(path.to_vec()); paths.push(path.to_vec());
let mut path_docids = universe.clone(); let mut path_docids = universe.clone();
@ -243,47 +239,48 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// We store the edges and their docids in vectors in case the path turns out to be // We store the edges and their docids in vectors in case the path turns out to be
// empty and we need to figure out why it was empty. // empty and we need to figure out why it was empty.
let mut visited_conditions = vec![]; let mut visited_conditions = vec![];
let mut cached_edge_docids = vec![]; let mut cached_condition_docids = vec![];
// graph.conditions_interner.map(|_| RoaringBitmap::new()); // graph.conditions_interner.map(|_| RoaringBitmap::new());
for &condition in path { for &condition in path {
visited_conditions.push(condition); visited_conditions.push(condition);
let edge_docids = let condition_docids = condition_docids_cache
edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?; .get_condition_docids(ctx, condition, graph, &universe)?;
cached_edge_docids.push((condition, edge_docids.clone())); // .get_mut(condition) = edge_docids.clone(); cached_condition_docids.push((condition, condition_docids.clone())); // .get_mut(condition) = condition_docids.clone();
// If the edge is empty, then the path will be empty as well, we update the graph // If the edge is empty, then the path will be empty as well, we update the graph
// and caches accordingly and skip to the next candidate path. // and caches accordingly and skip to the next candidate path.
if edge_docids.is_disjoint(&universe) { if condition_docids.is_disjoint(&universe) {
// 1. Store in the cache that this edge is empty for this universe // 1. Store in the cache that this edge is empty for this universe
empty_paths_cache.add_condition(condition); dead_end_path_cache.add_condition(condition);
// 2. remove this edge from the ranking rule graph // 2. remove this edge from the ranking rule graph
// ouch, no! :( need to link a condition to one or more ranking rule edges // ouch, no! :( need to link a condition to one or more ranking rule edges
graph.remove_edges_with_condition(condition); graph.remove_edges_with_condition(condition);
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore // 3. Also remove the entry from the condition_docids_cache, since we don't need it anymore
edge_docids_cache.cache.remove(&condition); condition_docids_cache.cache.remove(&condition);
return Ok(ControlFlow::Continue(())); return Ok(ControlFlow::Continue(()));
} }
path_docids &= edge_docids; path_docids &= condition_docids;
// If the (sub)path is empty, we try to figure out why and update the caches accordingly. // If the (sub)path is empty, we try to figure out why and update the caches accordingly.
if path_docids.is_disjoint(&universe) { if path_docids.is_disjoint(&universe) {
// First, we know that this path is empty, and thus any path // First, we know that this path is empty, and thus any path
// that is a superset of it will also be empty. // that is a superset of it will also be empty.
empty_paths_cache.add_prefix(&visited_conditions); dead_end_path_cache.add_prefix(&visited_conditions);
// Second, if the intersection between this edge and any // Second, if the intersection between this edge and any
// previous one is disjoint with the universe, // previous one is disjoint with the universe,
// then we also know that any path containing the same couple of // then we also know that any path containing the same couple of
// edges will also be empty. // edges will also be empty.
for (past_condition, edge_docids2) in cached_edge_docids.iter() { for (past_condition, condition_docids2) in cached_condition_docids.iter() {
if *past_condition == condition { if *past_condition == condition {
continue; continue;
}; };
let intersection = edge_docids & edge_docids2; let intersection = condition_docids & condition_docids2;
if intersection.is_disjoint(&universe) { if intersection.is_disjoint(&universe) {
empty_paths_cache.add_condition_couple(*past_condition, condition); dead_end_path_cache
.add_condition_couple(*past_condition, condition);
} }
} }
// We should maybe instead try to compute: // We should maybe instead try to compute:
@ -310,7 +307,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
G::log_state( G::log_state(
&original_graph, &original_graph,
&paths, &paths,
empty_paths_cache, dead_end_path_cache,
original_universe, original_universe,
all_distances, all_distances,
cost, cost,
@ -322,8 +319,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// But we only do it in case the bucket length is >1, because otherwise // But we only do it in case the bucket length is >1, because otherwise
// we know the child ranking rule won't be called anyway // we know the child ranking rule won't be called anyway
let mut next_query_graph = original_graph.query_graph; let mut next_query_graph = original_graph.query_graph;
next_query_graph.simplify();
if bucket.len() > 1 { if bucket.len() > 1 {
next_query_graph.simplify();
// 1. Gather all the words and phrases used in the computation of this bucket // 1. Gather all the words and phrases used in the computation of this bucket
let mut used_words = HashSet::new(); let mut used_words = HashSet::new();
let mut used_phrases = HashSet::new(); let mut used_phrases = HashSet::new();

View File

@ -25,6 +25,8 @@ impl<T> Interned<T> {
/// be copied, compared, and hashed efficiently. An immutable reference to the original value /// be copied, compared, and hashed efficiently. An immutable reference to the original value
/// can be retrieved using `self.get(interned)`. A set of values within the interner can be /// can be retrieved using `self.get(interned)`. A set of values within the interner can be
/// efficiently managed using [`SmallBitmap<T>`](super::small_bitmap::SmallBitmap). /// efficiently managed using [`SmallBitmap<T>`](super::small_bitmap::SmallBitmap).
///
/// A dedup-interner can contain a maximum of `u16::MAX` values.
#[derive(Clone)] #[derive(Clone)]
pub struct DedupInterner<T> { pub struct DedupInterner<T> {
stable_store: Vec<T>, stable_store: Vec<T>,
@ -36,7 +38,8 @@ impl<T> Default for DedupInterner<T> {
} }
} }
impl<T> DedupInterner<T> { impl<T> DedupInterner<T> {
/// /// Convert the dedup-interner into a fixed-size interner, such that new
/// elements cannot be added to it anymore.
pub fn freeze(self) -> FixedSizeInterner<T> { pub fn freeze(self) -> FixedSizeInterner<T> {
FixedSizeInterner { stable_store: self.stable_store } FixedSizeInterner { stable_store: self.stable_store }
} }
@ -46,6 +49,8 @@ impl<T> DedupInterner<T>
where where
T: Clone + Eq + Hash, T: Clone + Eq + Hash,
{ {
/// Insert the given value into the dedup-interner, and return
/// its index.
pub fn insert(&mut self, s: T) -> Interned<T> { pub fn insert(&mut self, s: T) -> Interned<T> {
if let Some(interned) = self.lookup.get(&s) { if let Some(interned) = self.lookup.get(&s) {
*interned *interned
@ -57,35 +62,21 @@ where
interned interned
} }
} }
/// Get a reference to the interned value.
pub fn get(&self, interned: Interned<T>) -> &T { pub fn get(&self, interned: Interned<T>) -> &T {
&self.stable_store[interned.idx as usize] &self.stable_store[interned.idx as usize]
} }
} }
#[derive(Clone)]
pub struct Interner<T> {
stable_store: Vec<T>,
}
impl<T> Default for Interner<T> {
fn default() -> Self {
Self { stable_store: Default::default() }
}
}
impl<T> Interner<T> {
pub fn freeze(self) -> FixedSizeInterner<T> {
FixedSizeInterner { stable_store: self.stable_store }
}
pub fn push(&mut self, s: T) -> Interned<T> {
assert!(self.stable_store.len() < u16::MAX as usize);
self.stable_store.push(s);
Interned::from_raw(self.stable_store.len() as u16 - 1)
}
}
/// A fixed-length store for values of type `T`, where each value is identified
/// by an index of type [`Interned<T>`].
#[derive(Clone)] #[derive(Clone)]
pub struct FixedSizeInterner<T> { pub struct FixedSizeInterner<T> {
stable_store: Vec<T>, stable_store: Vec<T>,
} }
impl<T: Clone> FixedSizeInterner<T> { impl<T: Clone> FixedSizeInterner<T> {
/// Create a fixed-size interner of the given length containing
/// clones of the given value.
pub fn new(length: u16, value: T) -> Self { pub fn new(length: u16, value: T) -> Self {
Self { stable_store: vec![value; length as usize] } Self { stable_store: vec![value; length as usize] }
} }
@ -105,7 +96,6 @@ impl<T> FixedSizeInterner<T> {
pub fn len(&self) -> u16 { pub fn len(&self) -> u16 {
self.stable_store.len() as u16 self.stable_store.len() as u16
} }
pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<U, T> { pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<U, T> {
MappedInterner { MappedInterner {
stable_store: self.stable_store.iter().map(map_f).collect(), stable_store: self.stable_store.iter().map(map_f).collect(),
@ -122,6 +112,12 @@ impl<T> FixedSizeInterner<T> {
self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x)) self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
} }
} }
/// A store of values of type `T`, each linked to a value of type `From`
/// stored in another interner. To create a mapped interner, use the
/// `map` method on [`FixedSizeInterner`] or [`MappedInterner`].
///
/// Values in this interner are indexed with [`Interned<From>`].
#[derive(Clone)] #[derive(Clone)]
pub struct MappedInterner<T, From> { pub struct MappedInterner<T, From> {
stable_store: Vec<T>, stable_store: Vec<T>,

View File

@ -10,7 +10,7 @@ use crate::search::new::interner::{Interned, MappedInterner};
use crate::search::new::query_graph::QueryNodeData; use crate::search::new::query_graph::QueryNodeData;
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm}; use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
use crate::search::new::ranking_rule_graph::{ use crate::search::new::ranking_rule_graph::{
DeadEndPathCache, Edge, EdgeCondition, ProximityCondition, ProximityGraph, RankingRuleGraph, DeadEndPathCache, Edge, ProximityCondition, ProximityGraph, RankingRuleGraph,
RankingRuleGraphTrait, TypoEdge, TypoGraph, RankingRuleGraphTrait, TypoEdge, TypoGraph,
}; };
use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
@ -44,7 +44,7 @@ pub enum SearchEvents {
ProximityState { ProximityState {
graph: RankingRuleGraph<ProximityGraph>, graph: RankingRuleGraph<ProximityGraph>,
paths: Vec<Vec<Interned<ProximityCondition>>>, paths: Vec<Vec<Interned<ProximityCondition>>>,
empty_paths_cache: DeadEndPathCache<ProximityGraph>, dead_end_path_cache: DeadEndPathCache<ProximityGraph>,
universe: RoaringBitmap, universe: RoaringBitmap,
distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
cost: u16, cost: u16,
@ -52,7 +52,7 @@ pub enum SearchEvents {
TypoState { TypoState {
graph: RankingRuleGraph<TypoGraph>, graph: RankingRuleGraph<TypoGraph>,
paths: Vec<Vec<Interned<TypoEdge>>>, paths: Vec<Vec<Interned<TypoEdge>>>,
empty_paths_cache: DeadEndPathCache<TypoGraph>, dead_end_path_cache: DeadEndPathCache<TypoGraph>,
universe: RoaringBitmap, universe: RoaringBitmap,
distances: MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>, distances: MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
cost: u16, cost: u16,
@ -170,7 +170,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
&mut self, &mut self,
query_graph: &RankingRuleGraph<ProximityGraph>, query_graph: &RankingRuleGraph<ProximityGraph>,
paths_map: &[Vec<Interned<ProximityCondition>>], paths_map: &[Vec<Interned<ProximityCondition>>],
empty_paths_cache: &DeadEndPathCache<ProximityGraph>, dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
cost: u16, cost: u16,
@ -178,7 +178,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
self.events.push(SearchEvents::ProximityState { self.events.push(SearchEvents::ProximityState {
graph: query_graph.clone(), graph: query_graph.clone(),
paths: paths_map.to_vec(), paths: paths_map.to_vec(),
empty_paths_cache: empty_paths_cache.clone(), dead_end_path_cache: dead_end_path_cache.clone(),
universe: universe.clone(), universe: universe.clone(),
distances: distances.clone(), distances: distances.clone(),
cost, cost,
@ -189,7 +189,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
&mut self, &mut self,
query_graph: &RankingRuleGraph<TypoGraph>, query_graph: &RankingRuleGraph<TypoGraph>,
paths_map: &[Vec<Interned<TypoEdge>>], paths_map: &[Vec<Interned<TypoEdge>>],
empty_paths_cache: &DeadEndPathCache<TypoGraph>, dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>, distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
cost: u16, cost: u16,
@ -197,7 +197,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
self.events.push(SearchEvents::TypoState { self.events.push(SearchEvents::TypoState {
graph: query_graph.clone(), graph: query_graph.clone(),
paths: paths_map.to_vec(), paths: paths_map.to_vec(),
empty_paths_cache: empty_paths_cache.clone(), dead_end_path_cache: dead_end_path_cache.clone(),
universe: universe.clone(), universe: universe.clone(),
distances: distances.clone(), distances: distances.clone(),
cost, cost,
@ -358,7 +358,7 @@ results.{random} {{
SearchEvents::ProximityState { SearchEvents::ProximityState {
graph, graph,
paths, paths,
empty_paths_cache, dead_end_path_cache,
universe, universe,
distances, distances,
cost, cost,
@ -374,7 +374,7 @@ results.{random} {{
ctx, ctx,
graph, graph,
paths, paths,
empty_paths_cache, dead_end_path_cache,
distances.clone(), distances.clone(),
&mut new_file, &mut new_file,
); );
@ -391,7 +391,7 @@ results.{random} {{
SearchEvents::TypoState { SearchEvents::TypoState {
graph, graph,
paths, paths,
empty_paths_cache, dead_end_path_cache,
universe, universe,
distances, distances,
cost, cost,
@ -407,7 +407,7 @@ results.{random} {{
ctx, ctx,
graph, graph,
paths, paths,
empty_paths_cache, dead_end_path_cache,
distances.clone(), distances.clone(),
&mut new_file, &mut new_file,
); );
@ -547,11 +547,11 @@ shape: class"
let Edge { source_node, dest_node, condition: details, cost } = edge; let Edge { source_node, dest_node, condition: details, cost } = edge;
match &details { match &details {
EdgeCondition::Unconditional => { None => {
writeln!(file, "{source_node} -> {dest_node} : \"always cost {cost}\"",) writeln!(file, "{source_node} -> {dest_node} : \"always cost {cost}\"",)
.unwrap(); .unwrap();
} }
EdgeCondition::Conditional(condition) => { Some(condition) => {
// let condition = graph.conditions_interner.get(*condition); // let condition = graph.conditions_interner.get(*condition);
writeln!( writeln!(
file, file,

View File

@ -66,7 +66,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
&mut self, &mut self,
query_graph: &RankingRuleGraph<ProximityGraph>, query_graph: &RankingRuleGraph<ProximityGraph>,
paths: &[Vec<Interned<ProximityCondition>>], paths: &[Vec<Interned<ProximityCondition>>],
empty_paths_cache: &DeadEndPathCache<ProximityGraph>, dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
cost: u16, cost: u16,
@ -77,7 +77,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
&mut self, &mut self,
query_graph: &RankingRuleGraph<TypoGraph>, query_graph: &RankingRuleGraph<TypoGraph>,
paths: &[Vec<Interned<TypoEdge>>], paths: &[Vec<Interned<TypoEdge>>],
empty_paths_cache: &DeadEndPathCache<TypoGraph>, dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>, distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
cost: u16, cost: u16,
@ -137,7 +137,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
&mut self, &mut self,
_query_graph: &RankingRuleGraph<ProximityGraph>, _query_graph: &RankingRuleGraph<ProximityGraph>,
_paths_map: &[Vec<Interned<ProximityCondition>>], _paths_map: &[Vec<Interned<ProximityCondition>>],
_empty_paths_cache: &DeadEndPathCache<ProximityGraph>, _dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
_universe: &RoaringBitmap, _universe: &RoaringBitmap,
_distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, _distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
_cost: u16, _cost: u16,
@ -148,7 +148,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
&mut self, &mut self,
_query_graph: &RankingRuleGraph<TypoGraph>, _query_graph: &RankingRuleGraph<TypoGraph>,
_paths: &[Vec<Interned<TypoEdge>>], _paths: &[Vec<Interned<TypoEdge>>],
_empty_paths_cache: &DeadEndPathCache<TypoGraph>, _dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
_universe: &RoaringBitmap, _universe: &RoaringBitmap,
_distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>, _distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
_cost: u16, _cost: u16,

View File

@ -36,6 +36,7 @@ use crate::search::new::query_term::located_query_terms_from_string;
use crate::search::new::words::Words; use crate::search::new::words::Words;
use crate::{Filter, Index, Result, TermsMatchingStrategy}; use crate::{Filter, Index, Result, TermsMatchingStrategy};
/// A structure used throughout the execution of a search query.
pub struct SearchContext<'ctx> { pub struct SearchContext<'ctx> {
pub index: &'ctx Index, pub index: &'ctx Index,
pub txn: &'ctx RoTxn<'ctx>, pub txn: &'ctx RoTxn<'ctx>,
@ -59,6 +60,7 @@ impl<'ctx> SearchContext<'ctx> {
} }
} }
/// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it.
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
fn resolve_maximally_reduced_query_graph<'ctx>( fn resolve_maximally_reduced_query_graph<'ctx>(
ctx: &mut SearchContext<'ctx>, ctx: &mut SearchContext<'ctx>,
@ -100,6 +102,8 @@ fn resolve_maximally_reduced_query_graph<'ctx>(
Ok(docids) Ok(docids)
} }
/// Return the list of initialised ranking rules to be used for a placeholder search.
fn get_ranking_rules_for_placeholder_search<'ctx>( fn get_ranking_rules_for_placeholder_search<'ctx>(
ctx: &SearchContext<'ctx>, ctx: &SearchContext<'ctx>,
) -> Result<Vec<Box<dyn RankingRule<'ctx, PlaceholderQuery>>>> { ) -> Result<Vec<Box<dyn RankingRule<'ctx, PlaceholderQuery>>>> {
@ -123,6 +127,8 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
} }
Ok(ranking_rules) Ok(ranking_rules)
} }
/// Return the list of initialised ranking rules to be used for a query graph search.
fn get_ranking_rules_for_query_graph_search<'ctx>( fn get_ranking_rules_for_query_graph_search<'ctx>(
ctx: &SearchContext<'ctx>, ctx: &SearchContext<'ctx>,
terms_matching_strategy: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,

View File

@ -1,7 +1,7 @@
use std::collections::HashSet; use std::collections::HashSet;
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait}; use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::{DedupInterner, Interner}; use crate::search::new::interner::DedupInterner;
use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
use crate::search::new::{QueryGraph, SearchContext}; use crate::search::new::{QueryGraph, SearchContext};
use crate::Result; use crate::Result;
@ -19,7 +19,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
let mut conditions_interner = DedupInterner::default(); let mut conditions_interner = DedupInterner::default();
let mut edges_store = Interner::default(); let mut edges_store = DedupInterner::default();
let mut edges_of_node = query_graph.nodes.map(|_| HashSet::new()); let mut edges_of_node = query_graph.nodes.map(|_| HashSet::new());
for (source_id, source_node) in graph_nodes.iter() { for (source_id, source_node) in graph_nodes.iter() {
@ -33,7 +33,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
} }
for (cost, condition) in edges { for (cost, condition) in edges {
let new_edge_id = edges_store.push(Some(Edge { let new_edge_id = edges_store.insert(Some(Edge {
source_node: source_id, source_node: source_id,
dest_node: dest_idx, dest_node: dest_idx,
cost, cost,

View File

@ -4,8 +4,8 @@ use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, VecDeque}; use std::collections::{BTreeMap, VecDeque};
use std::ops::ControlFlow; use std::ops::ControlFlow;
use super::empty_paths_cache::DeadEndPathCache; use super::dead_end_path_cache::DeadEndPathCache;
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait}; use super::{RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::{Interned, MappedInterner}; use crate::search::new::interner::{Interned, MappedInterner};
use crate::search::new::query_graph::QueryNode; use crate::search::new::query_graph::QueryNode;
use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
@ -23,7 +23,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
from: Interned<QueryNode>, from: Interned<QueryNode>,
cost: u16, cost: u16,
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>, all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
empty_paths_cache: &mut DeadEndPathCache<G>, dead_end_path_cache: &mut DeadEndPathCache<G>,
mut visit: impl FnMut( mut visit: impl FnMut(
&[Interned<G::EdgeCondition>], &[Interned<G::EdgeCondition>],
&mut Self, &mut Self,
@ -34,11 +34,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
from, from,
cost, cost,
all_distances, all_distances,
empty_paths_cache, dead_end_path_cache,
&mut visit, &mut visit,
&mut vec![], &mut vec![],
&mut SmallBitmap::for_interned_values_in(&self.conditions_interner), &mut SmallBitmap::for_interned_values_in(&self.conditions_interner),
&mut empty_paths_cache.conditions.clone(), &mut dead_end_path_cache.conditions.clone(),
)?; )?;
Ok(()) Ok(())
} }
@ -47,7 +47,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
from: Interned<QueryNode>, from: Interned<QueryNode>,
cost: u16, cost: u16,
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>, all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
empty_paths_cache: &mut DeadEndPathCache<G>, dead_end_path_cache: &mut DeadEndPathCache<G>,
visit: &mut impl FnMut( visit: &mut impl FnMut(
&[Interned<G::EdgeCondition>], &[Interned<G::EdgeCondition>],
&mut Self, &mut Self,
@ -66,10 +66,10 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
continue; continue;
} }
let next_any_valid = match edge.condition { let next_any_valid = match edge.condition {
EdgeCondition::Unconditional => { None => {
if edge.dest_node == self.query_graph.end_node { if edge.dest_node == self.query_graph.end_node {
any_valid = true; any_valid = true;
let control_flow = visit(prev_conditions, self, empty_paths_cache)?; let control_flow = visit(prev_conditions, self, dead_end_path_cache)?;
match control_flow { match control_flow {
ControlFlow::Continue(_) => {} ControlFlow::Continue(_) => {}
ControlFlow::Break(_) => return Ok(true), ControlFlow::Break(_) => return Ok(true),
@ -80,7 +80,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
edge.dest_node, edge.dest_node,
cost - edge.cost as u16, cost - edge.cost as u16,
all_distances, all_distances,
empty_paths_cache, dead_end_path_cache,
visit, visit,
prev_conditions, prev_conditions,
cur_path, cur_path,
@ -88,7 +88,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
)? )?
} }
} }
EdgeCondition::Conditional(condition) => { Some(condition) => {
if forbidden_conditions.contains(condition) if forbidden_conditions.contains(condition)
|| !all_distances.get(edge.dest_node).iter().any( || !all_distances.get(edge.dest_node).iter().any(
|(next_cost, necessary_conditions)| { |(next_cost, necessary_conditions)| {
@ -104,8 +104,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
let mut new_forbidden_conditions = forbidden_conditions.clone(); let mut new_forbidden_conditions = forbidden_conditions.clone();
new_forbidden_conditions new_forbidden_conditions
.union(empty_paths_cache.condition_couples.get(condition)); .union(dead_end_path_cache.condition_couples.get(condition));
empty_paths_cache.prefixes.final_edges_after_prefix( dead_end_path_cache.prefixes.final_edges_after_prefix(
prev_conditions, prev_conditions,
&mut |x| { &mut |x| {
new_forbidden_conditions.insert(x); new_forbidden_conditions.insert(x);
@ -113,7 +113,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
); );
let next_any_valid = if edge.dest_node == self.query_graph.end_node { let next_any_valid = if edge.dest_node == self.query_graph.end_node {
any_valid = true; any_valid = true;
let control_flow = visit(prev_conditions, self, empty_paths_cache)?; let control_flow = visit(prev_conditions, self, dead_end_path_cache)?;
match control_flow { match control_flow {
ControlFlow::Continue(_) => {} ControlFlow::Continue(_) => {}
ControlFlow::Break(_) => return Ok(true), ControlFlow::Break(_) => return Ok(true),
@ -124,7 +124,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
edge.dest_node, edge.dest_node,
cost - edge.cost as u16, cost - edge.cost as u16,
all_distances, all_distances,
empty_paths_cache, dead_end_path_cache,
visit, visit,
prev_conditions, prev_conditions,
cur_path, cur_path,
@ -139,15 +139,15 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
any_valid |= next_any_valid; any_valid |= next_any_valid;
if next_any_valid { if next_any_valid {
if empty_paths_cache.path_is_dead_end(prev_conditions, cur_path) { if dead_end_path_cache.path_is_dead_end(prev_conditions, cur_path) {
return Ok(any_valid); return Ok(any_valid);
} }
forbidden_conditions.union(&empty_paths_cache.conditions); forbidden_conditions.union(&dead_end_path_cache.conditions);
for prev_condition in prev_conditions.iter() { for prev_condition in prev_conditions.iter() {
forbidden_conditions forbidden_conditions
.union(empty_paths_cache.condition_couples.get(*prev_condition)); .union(dead_end_path_cache.condition_couples.get(*prev_condition));
} }
empty_paths_cache.prefixes.final_edges_after_prefix(prev_conditions, &mut |x| { dead_end_path_cache.prefixes.final_edges_after_prefix(prev_conditions, &mut |x| {
forbidden_conditions.insert(x); forbidden_conditions.insert(x);
}); });
} }
@ -178,16 +178,14 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
let cur_node_edges = &self.edges_of_node.get(cur_node); let cur_node_edges = &self.edges_of_node.get(cur_node);
for edge_idx in cur_node_edges.iter() { for edge_idx in cur_node_edges.iter() {
let edge = self.edges_store.get(edge_idx).as_ref().unwrap(); let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
let condition = match edge.condition {
EdgeCondition::Unconditional => None,
EdgeCondition::Conditional(condition) => Some(condition),
};
let succ_node = edge.dest_node; let succ_node = edge.dest_node;
let succ_distances = distances_to_end.get(succ_node); let succ_distances = distances_to_end.get(succ_node);
for (succ_distance, succ_necessary_conditions) in succ_distances { for (succ_distance, succ_necessary_conditions) in succ_distances {
let mut potential_necessary_edges = let mut potential_necessary_edges =
SmallBitmap::for_interned_values_in(&self.conditions_interner); SmallBitmap::for_interned_values_in(&self.conditions_interner);
for condition in condition.into_iter().chain(succ_necessary_conditions.iter()) { for condition in
edge.condition.into_iter().chain(succ_necessary_conditions.iter())
{
potential_necessary_edges.insert(condition); potential_necessary_edges.insert(condition);
} }

View File

@ -9,44 +9,43 @@ use crate::search::new::SearchContext;
use crate::Result; use crate::Result;
/// A cache storing the document ids associated with each ranking rule edge /// A cache storing the document ids associated with each ranking rule edge
pub struct EdgeConditionDocIdsCache<G: RankingRuleGraphTrait> { pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap> // TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
pub cache: FxHashMap<Interned<G::EdgeCondition>, RoaringBitmap>, pub cache: FxHashMap<Interned<G::Condition>, RoaringBitmap>,
_phantom: PhantomData<G>, _phantom: PhantomData<G>,
} }
impl<G: RankingRuleGraphTrait> Default for EdgeConditionDocIdsCache<G> { impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> {
fn default() -> Self { fn default() -> Self {
Self { cache: Default::default(), _phantom: Default::default() } Self { cache: Default::default(), _phantom: Default::default() }
} }
} }
impl<G: RankingRuleGraphTrait> EdgeConditionDocIdsCache<G> { impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
/// Retrieve the document ids for the given edge condition. /// Retrieve the document ids for the given edge condition.
/// ///
/// If the cache does not yet contain these docids, they are computed /// If the cache does not yet contain these docids, they are computed
/// and inserted in the cache. /// and inserted in the cache.
pub fn get_edge_docids<'s, 'ctx>( pub fn get_condition_docids<'s, 'ctx>(
&'s mut self, &'s mut self,
ctx: &mut SearchContext<'ctx>, ctx: &mut SearchContext<'ctx>,
// TODO: should be Interned<EdgeCondition> interned_condition: Interned<G::Condition>,
interned_edge_condition: Interned<G::EdgeCondition>,
graph: &RankingRuleGraph<G>, graph: &RankingRuleGraph<G>,
// TODO: maybe universe doesn't belong here // TODO: maybe universe doesn't belong here
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<&'s RoaringBitmap> { ) -> Result<&'s RoaringBitmap> {
if self.cache.contains_key(&interned_edge_condition) { if self.cache.contains_key(&interned_condition) {
// TODO: should we update the bitmap in the cache if the new universe // TODO: should we update the bitmap in the cache if the new universe
// reduces it? // reduces it?
// TODO: maybe have a generation: u32 to track every time the universe was // TODO: maybe have a generation: u32 to track every time the universe was
// reduced. Then only attempt to recompute the intersection when there is a chance // reduced. Then only attempt to recompute the intersection when there is a chance
// that edge_docids & universe changed // that condition_docids & universe changed
return Ok(&self.cache[&interned_edge_condition]); return Ok(&self.cache[&interned_condition]);
} }
// TODO: maybe universe doesn't belong here // TODO: maybe universe doesn't belong here
let edge_condition = graph.conditions_interner.get(interned_edge_condition); let condition = graph.conditions_interner.get(interned_condition);
// TODO: faster way to do this? // TODO: faster way to do this?
let docids = universe & G::resolve_edge_condition(ctx, edge_condition, universe)?; let docids = universe & G::resolve_condition(ctx, condition, universe)?;
let _ = self.cache.insert(interned_edge_condition, docids); let _ = self.cache.insert(interned_condition, docids);
let docids = &self.cache[&interned_edge_condition]; let docids = &self.cache[&interned_condition];
Ok(docids) Ok(docids)
} }
} }

View File

@ -9,11 +9,11 @@ use crate::search::new::{
/// universe. /// universe.
pub struct DeadEndPathCache<G: RankingRuleGraphTrait> { pub struct DeadEndPathCache<G: RankingRuleGraphTrait> {
/// The set of edge conditions that resolve to no documents. /// The set of edge conditions that resolve to no documents.
pub conditions: SmallBitmap<G::EdgeCondition>, pub conditions: SmallBitmap<G::Condition>,
/// A set of path prefixes that resolve to no documents. /// A set of path prefixes that resolve to no documents.
pub prefixes: PathSet<G::EdgeCondition>, pub prefixes: PathSet<G::Condition>,
/// A set of empty couples of edge conditions that resolve to no documents. /// A set of empty couples of edge conditions that resolve to no documents.
pub condition_couples: MappedInterner<SmallBitmap<G::EdgeCondition>, G::EdgeCondition>, pub condition_couples: MappedInterner<SmallBitmap<G::Condition>, G::Condition>,
} }
impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> { impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
fn clone(&self) -> Self { fn clone(&self) -> Self {
@ -27,17 +27,17 @@ impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> { impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges. /// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
pub fn new(all_edge_conditions: &FixedSizeInterner<G::EdgeCondition>) -> Self { pub fn new(all_conditions: &FixedSizeInterner<G::Condition>) -> Self {
Self { Self {
conditions: SmallBitmap::for_interned_values_in(all_edge_conditions), conditions: SmallBitmap::for_interned_values_in(all_conditions),
prefixes: PathSet::default(), prefixes: PathSet::default(),
condition_couples: all_edge_conditions condition_couples: all_conditions
.map(|_| SmallBitmap::for_interned_values_in(all_edge_conditions)), .map(|_| SmallBitmap::for_interned_values_in(all_conditions)),
} }
} }
/// Store in the cache that every path containing the given edge resolves to no documents. /// Store in the cache that every path containing the given edge resolves to no documents.
pub fn add_condition(&mut self, condition: Interned<G::EdgeCondition>) { pub fn add_condition(&mut self, condition: Interned<G::Condition>) {
self.conditions.insert(condition); self.conditions.insert(condition);
self.condition_couples.get_mut(condition).clear(); self.condition_couples.get_mut(condition).clear();
self.prefixes.remove_edge(condition); self.prefixes.remove_edge(condition);
@ -46,7 +46,7 @@ impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
} }
} }
/// Store in the cache that every path containing the given prefix resolves to no documents. /// Store in the cache that every path containing the given prefix resolves to no documents.
pub fn add_prefix(&mut self, prefix: &[Interned<G::EdgeCondition>]) { pub fn add_prefix(&mut self, prefix: &[Interned<G::Condition>]) {
// TODO: typed PathSet // TODO: typed PathSet
self.prefixes.insert(prefix.iter().copied()); self.prefixes.insert(prefix.iter().copied());
} }
@ -54,8 +54,8 @@ impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
/// Store in the cache that every path containing the two given edges resolves to no documents. /// Store in the cache that every path containing the two given edges resolves to no documents.
pub fn add_condition_couple( pub fn add_condition_couple(
&mut self, &mut self,
edge1: Interned<G::EdgeCondition>, edge1: Interned<G::Condition>,
edge2: Interned<G::EdgeCondition>, edge2: Interned<G::Condition>,
) { ) {
self.condition_couples.get_mut(edge1).insert(edge2); self.condition_couples.get_mut(edge1).insert(edge2);
} }
@ -63,8 +63,8 @@ impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
/// Returns true if the cache can determine that the given path resolves to no documents. /// Returns true if the cache can determine that the given path resolves to no documents.
pub fn path_is_dead_end( pub fn path_is_dead_end(
&self, &self,
path: &[Interned<G::EdgeCondition>], path: &[Interned<G::Condition>],
path_bitmap: &SmallBitmap<G::EdgeCondition>, path_bitmap: &SmallBitmap<G::Condition>,
) -> bool { ) -> bool {
if path_bitmap.intersects(&self.conditions) { if path_bitmap.intersects(&self.conditions) {
return true; return true;

View File

@ -7,8 +7,8 @@ the same but the edges are replaced.
mod build; mod build;
mod cheapest_paths; mod cheapest_paths;
mod edge_docids_cache; mod condition_docids_cache;
mod empty_paths_cache; mod dead_end_path_cache;
mod path_set; mod path_set;
/// Implementation of the `proximity` ranking rule /// Implementation of the `proximity` ranking rule
@ -19,8 +19,8 @@ mod typo;
use std::collections::HashSet; use std::collections::HashSet;
use std::hash::Hash; use std::hash::Hash;
pub use edge_docids_cache::EdgeConditionDocIdsCache; pub use condition_docids_cache::EdgeConditionDocIdsCache;
pub use empty_paths_cache::DeadEndPathCache; pub use dead_end_path_cache::DeadEndPathCache;
pub use proximity::{ProximityCondition, ProximityGraph}; pub use proximity::{ProximityCondition, ProximityGraph};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
pub use typo::{TypoEdge, TypoGraph}; pub use typo::{TypoEdge, TypoGraph};
@ -32,31 +32,6 @@ use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext}; use super::{QueryGraph, QueryNode, SearchContext};
use crate::Result; use crate::Result;
/// The condition that is associated with an edge in the ranking rule graph.
///
/// Some edges are unconditional, which means that traversing them does not reduce
/// the set of candidates.
///
/// Most edges, however, have a condition attached to them. For example, for the
/// proximity ranking rule, the condition could be that a word is N-close to another one.
/// When the edge is traversed, some database operations are executed to retrieve the set
/// of documents that satisfy the condition, which reduces the list of candidate document ids.
pub enum EdgeCondition<E> {
Unconditional,
Conditional(Interned<E>),
}
impl<E> Copy for EdgeCondition<E> {}
impl<E> Clone for EdgeCondition<E> {
fn clone(&self) -> Self {
match self {
Self::Unconditional => Self::Unconditional,
Self::Conditional(arg0) => Self::Conditional(*arg0),
}
}
}
/// An edge in the ranking rule graph. /// An edge in the ranking rule graph.
/// ///
/// It contains: /// It contains:
@ -68,7 +43,27 @@ pub struct Edge<E> {
pub source_node: Interned<QueryNode>, pub source_node: Interned<QueryNode>,
pub dest_node: Interned<QueryNode>, pub dest_node: Interned<QueryNode>,
pub cost: u8, pub cost: u8,
pub condition: EdgeCondition<E>, pub condition: Option<Interned<E>>,
}
impl<E> Hash for Edge<E> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.source_node.hash(state);
self.dest_node.hash(state);
self.cost.hash(state);
self.condition.hash(state);
}
}
impl<E> Eq for Edge<E> {}
impl<E> PartialEq for Edge<E> {
fn eq(&self, other: &Self) -> bool {
self.source_node == other.source_node
&& self.dest_node == other.dest_node
&& self.cost == other.cost
&& self.condition == other.condition
}
} }
/// A trait to be implemented by a marker type to build a graph-based ranking rule. /// A trait to be implemented by a marker type to build a graph-based ranking rule.
@ -113,12 +108,12 @@ pub trait RankingRuleGraphTrait: Sized {
conditions_interner: &mut DedupInterner<Self::EdgeCondition>, conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
source_node: &QueryNode, source_node: &QueryNode,
dest_node: &QueryNode, dest_node: &QueryNode,
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>; ) -> Result<Vec<(u8, Option<Interned<Self::EdgeCondition>>)>>;
fn log_state( fn log_state(
graph: &RankingRuleGraph<Self>, graph: &RankingRuleGraph<Self>,
paths: &[Vec<Interned<Self::EdgeCondition>>], paths: &[Vec<Interned<Self::EdgeCondition>>],
empty_paths_cache: &DeadEndPathCache<Self>, dead_end_path_cache: &DeadEndPathCache<Self>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
distances: &MappedInterner<Vec<(u16, SmallBitmap<Self::EdgeCondition>)>, QueryNode>, distances: &MappedInterner<Vec<(u16, SmallBitmap<Self::EdgeCondition>)>, QueryNode>,
cost: u16, cost: u16,
@ -151,9 +146,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
pub fn remove_edges_with_condition(&mut self, condition_to_remove: Interned<G::EdgeCondition>) { pub fn remove_edges_with_condition(&mut self, condition_to_remove: Interned<G::EdgeCondition>) {
for (edge_id, edge_opt) in self.edges_store.iter_mut() { for (edge_id, edge_opt) in self.edges_store.iter_mut() {
let Some(edge) = edge_opt.as_mut() else { continue }; let Some(edge) = edge_opt.as_mut() else { continue };
match edge.condition { let Some(condition) = edge.condition else { continue };
EdgeCondition::Unconditional => continue,
EdgeCondition::Conditional(condition) => {
if condition == condition_to_remove { if condition == condition_to_remove {
let (source_node, _dest_node) = (edge.source_node, edge.dest_node); let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
*edge_opt = None; *edge_opt = None;
@ -161,6 +155,4 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
} }
} }
} }
}
}
} }

View File

@ -4,7 +4,7 @@
use crate::search::new::interner::Interned; use crate::search::new::interner::Interned;
/// A set of [`Path`] /// A set of `Vec<Interned<T>>`.
pub struct PathSet<T> { pub struct PathSet<T> {
nodes: Vec<(Interned<T>, Self)>, nodes: Vec<(Interned<T>, Self)>,
is_end: bool, is_end: bool,

View File

@ -7,7 +7,6 @@ use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_graph::QueryNodeData; use crate::search::new::query_graph::QueryNodeData;
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm}; use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm};
use crate::search::new::ranking_rule_graph::proximity::WordPair; use crate::search::new::ranking_rule_graph::proximity::WordPair;
use crate::search::new::ranking_rule_graph::EdgeCondition;
use crate::search::new::{QueryNode, SearchContext}; use crate::search::new::{QueryNode, SearchContext};
use crate::Result; use crate::Result;
use heed::RoTxn; use heed::RoTxn;
@ -40,7 +39,7 @@ pub fn build_edges<'ctx>(
conditions_interner: &mut DedupInterner<ProximityCondition>, conditions_interner: &mut DedupInterner<ProximityCondition>,
from_node: &QueryNode, from_node: &QueryNode,
to_node: &QueryNode, to_node: &QueryNode,
) -> Result<Vec<(u8, EdgeCondition<ProximityCondition>)>> { ) -> Result<Vec<(u8, Option<Interned<ProximityCondition>>)>> {
let SearchContext { let SearchContext {
index, index,
txn, txn,
@ -52,7 +51,7 @@ pub fn build_edges<'ctx>(
} = ctx; } = ctx;
let right_term = match &to_node.data { let right_term = match &to_node.data {
QueryNodeData::End => return Ok(vec![(0, EdgeCondition::Unconditional)]), QueryNodeData::End => return Ok(vec![(0, None)]),
QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]), QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]),
QueryNodeData::Term(term) => term, QueryNodeData::Term(term) => term,
}; };
@ -70,7 +69,7 @@ pub fn build_edges<'ctx>(
QueryNodeData::Start => { QueryNodeData::Start => {
return Ok(vec![( return Ok(vec![(
(right_ngram_length - 1) as u8, (right_ngram_length - 1) as u8,
EdgeCondition::Conditional( Some(
conditions_interner conditions_interner
.insert(ProximityCondition::Term { term: *right_term_interned }), .insert(ProximityCondition::Term { term: *right_term_interned }),
), ),
@ -88,7 +87,7 @@ pub fn build_edges<'ctx>(
// but `sun` and `are` have no proximity condition between them // but `sun` and `are` have no proximity condition between them
return Ok(vec![( return Ok(vec![(
(right_ngram_length - 1) as u8, (right_ngram_length - 1) as u8,
EdgeCondition::Conditional( Some(
conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }), conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }),
), ),
)]); )]);
@ -140,7 +139,7 @@ pub fn build_edges<'ctx>(
.map(|(cost, word_pairs)| { .map(|(cost, word_pairs)| {
( (
cost, cost,
EdgeCondition::Conditional( Some(
conditions_interner conditions_interner
.insert(ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice() }), .insert(ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice() }),
), ),
@ -149,9 +148,7 @@ pub fn build_edges<'ctx>(
.collect::<Vec<_>>(); .collect::<Vec<_>>();
new_edges.push(( new_edges.push((
8 + (right_ngram_length - 1) as u8, 8 + (right_ngram_length - 1) as u8,
EdgeCondition::Conditional( Some(conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned })),
conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }),
),
)); ));
Ok(new_edges) Ok(new_edges)
} }

View File

@ -6,8 +6,8 @@ use std::iter::FromIterator;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::empty_paths_cache::DeadEndPathCache; use super::dead_end_path_cache::DeadEndPathCache;
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait}; use super::{RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
use crate::search::new::logger::SearchLogger; use crate::search::new::logger::SearchLogger;
use crate::search::new::query_term::{Phrase, QueryTerm}; use crate::search::new::query_term::{Phrase, QueryTerm};
@ -60,20 +60,20 @@ impl RankingRuleGraphTrait for ProximityGraph {
conditions_interner: &mut DedupInterner<Self::EdgeCondition>, conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
source_node: &QueryNode, source_node: &QueryNode,
dest_node: &QueryNode, dest_node: &QueryNode,
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> { ) -> Result<Vec<(u8, Option<Interned<Self::EdgeCondition>>)>> {
build::build_edges(ctx, conditions_interner, source_node, dest_node) build::build_edges(ctx, conditions_interner, source_node, dest_node)
} }
fn log_state( fn log_state(
graph: &RankingRuleGraph<Self>, graph: &RankingRuleGraph<Self>,
paths: &[Vec<Interned<ProximityCondition>>], paths: &[Vec<Interned<ProximityCondition>>],
empty_paths_cache: &DeadEndPathCache<Self>, dead_end_path_cache: &DeadEndPathCache<Self>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>, distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
cost: u16, cost: u16,
logger: &mut dyn SearchLogger<QueryGraph>, logger: &mut dyn SearchLogger<QueryGraph>,
) { ) {
logger.log_proximity_state(graph, paths, empty_paths_cache, universe, distances, cost); logger.log_proximity_state(graph, paths, dead_end_path_cache, universe, distances, cost);
} }
fn label_for_edge_condition<'ctx>( fn label_for_edge_condition<'ctx>(

View File

@ -1,7 +1,7 @@
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::empty_paths_cache::DeadEndPathCache; use super::dead_end_path_cache::DeadEndPathCache;
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait}; use super::{RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
use crate::search::new::logger::SearchLogger; use crate::search::new::logger::SearchLogger;
use crate::search::new::query_graph::QueryNodeData; use crate::search::new::query_graph::QueryNodeData;
@ -58,7 +58,7 @@ impl RankingRuleGraphTrait for TypoGraph {
conditions_interner: &mut DedupInterner<Self::EdgeCondition>, conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
_from_node: &QueryNode, _from_node: &QueryNode,
to_node: &QueryNode, to_node: &QueryNode,
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> { ) -> Result<Vec<(u8, Option<Interned<Self::EdgeCondition>>)>> {
let SearchContext { term_interner, .. } = ctx; let SearchContext { term_interner, .. } = ctx;
match &to_node.data { match &to_node.data {
QueryNodeData::Term(LocatedQueryTerm { value, positions }) => { QueryNodeData::Term(LocatedQueryTerm { value, positions }) => {
@ -121,7 +121,7 @@ impl RankingRuleGraphTrait for TypoGraph {
if !new_term.is_empty() { if !new_term.is_empty() {
edges.push(( edges.push((
nbr_typos as u8 + base_cost, nbr_typos as u8 + base_cost,
EdgeCondition::Conditional(conditions_interner.insert(TypoEdge { Some(conditions_interner.insert(TypoEdge {
term: term_interner.insert(new_term), term: term_interner.insert(new_term),
nbr_typos: nbr_typos as u8, nbr_typos: nbr_typos as u8,
})), })),
@ -130,7 +130,7 @@ impl RankingRuleGraphTrait for TypoGraph {
} }
Ok(edges) Ok(edges)
} }
QueryNodeData::End => Ok(vec![(0, EdgeCondition::Unconditional)]), QueryNodeData::End => Ok(vec![(0, None)]),
QueryNodeData::Deleted | QueryNodeData::Start => panic!(), QueryNodeData::Deleted | QueryNodeData::Start => panic!(),
} }
} }
@ -138,13 +138,13 @@ impl RankingRuleGraphTrait for TypoGraph {
fn log_state( fn log_state(
graph: &RankingRuleGraph<Self>, graph: &RankingRuleGraph<Self>,
paths: &[Vec<Interned<TypoEdge>>], paths: &[Vec<Interned<TypoEdge>>],
empty_paths_cache: &DeadEndPathCache<Self>, dead_end_path_cache: &DeadEndPathCache<Self>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>, distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
cost: u16, cost: u16,
logger: &mut dyn SearchLogger<QueryGraph>, logger: &mut dyn SearchLogger<QueryGraph>,
) { ) {
logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances, cost); logger.log_typo_state(graph, paths, dead_end_path_cache, universe, distances, cost);
} }
fn label_for_edge_condition<'ctx>( fn label_for_edge_condition<'ctx>(

View File

@ -2,6 +2,7 @@ use std::marker::PhantomData;
use super::interner::{FixedSizeInterner, Interned}; use super::interner::{FixedSizeInterner, Interned};
/// A compact set of [`Interned<T>`]
pub struct SmallBitmap<T> { pub struct SmallBitmap<T> {
internal: SmallBitmapInternal, internal: SmallBitmapInternal,
_phantom: PhantomData<T>, _phantom: PhantomData<T>,