Simplify graph-based ranking rule impl

This commit is contained in:
Loïc Lecrenier 2023-03-19 14:43:14 +01:00
parent c6ff97a220
commit dd491320e5
2 changed files with 7 additions and 60 deletions

View File

@ -87,7 +87,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
/// Cache to retrieve the docids associated with each edge /// Cache to retrieve the docids associated with each edge
conditions_cache: ConditionDocIdsCache<G>, conditions_cache: ConditionDocIdsCache<G>,
/// Cache used to optimistically discard paths that resolve to no documents. /// Cache used to optimistically discard paths that resolve to no documents.
dead_end_path_cache: DeadEndsCache<G::Condition>, dead_ends_cache: DeadEndsCache<G::Condition>,
/// A structure giving the list of possible costs from each node to the end node, /// A structure giving the list of possible costs from each node to the end node,
/// along with a set of unavoidable edges that must be traversed to achieve that distance. /// along with a set of unavoidable edges that must be traversed to achieve that distance.
all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>, all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>,
@ -95,34 +95,6 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
cur_distance_idx: usize, cur_distance_idx: usize,
} }
/// Traverse each edge of the graph, computes its associated document ids,
/// and remove this edge from the graph if its docids are disjoint with the
/// given universe.
fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>(
ctx: &mut SearchContext<'ctx>,
graph: &mut RankingRuleGraph<G>,
condition_docids_cache: &mut ConditionDocIdsCache<G>,
universe: &RoaringBitmap,
dead_end_path_cache: &mut DeadEndsCache<G::Condition>,
) -> Result<()> {
for edge_id in graph.edges_store.indexes() {
let Some(edge) = graph.edges_store.get(edge_id).as_ref() else {
continue;
};
let Some(condition) = edge.condition else { continue };
let docids =
condition_docids_cache.get_condition_docids(ctx, condition, graph, universe)?;
if docids.is_empty() {
graph.remove_edges_with_condition(condition);
dead_end_path_cache.forbid_condition(condition); // add_condition(condition);
condition_docids_cache.cache.remove(&condition);
continue;
}
}
Ok(())
}
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> { impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
fn id(&self) -> String { fn id(&self) -> String {
self.id.clone() self.id.clone()
@ -131,22 +103,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
&mut self, &mut self,
ctx: &mut SearchContext<'ctx>, ctx: &mut SearchContext<'ctx>,
_logger: &mut dyn SearchLogger<QueryGraph>, _logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap, _universe: &RoaringBitmap,
query_graph: &QueryGraph, query_graph: &QueryGraph,
) -> Result<()> { ) -> Result<()> {
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?; let graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
let mut condition_docids_cache = ConditionDocIdsCache::default(); let condition_docids_cache = ConditionDocIdsCache::default();
let mut dead_end_path_cache = DeadEndsCache::new(&graph.conditions_interner); let dead_end_path_cache = DeadEndsCache::new(&graph.conditions_interner);
// First simplify the graph as much as possible, by computing the docids of all the conditions
// within the rule's universe and removing the edges that have no associated docids.
remove_empty_edges(
ctx,
&mut graph,
&mut condition_docids_cache,
universe,
&mut dead_end_path_cache,
)?;
// Then pre-compute the cost of all paths from each node to the end node // Then pre-compute the cost of all paths from each node to the end node
let all_distances = graph.initialize_distances_with_necessary_edges(); let all_distances = graph.initialize_distances_with_necessary_edges();
@ -154,7 +116,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
let state = GraphBasedRankingRuleState { let state = GraphBasedRankingRuleState {
graph, graph,
conditions_cache: condition_docids_cache, conditions_cache: condition_docids_cache,
dead_end_path_cache, dead_ends_cache: dead_end_path_cache,
all_distances, all_distances,
cur_distance_idx: 0, cur_distance_idx: 0,
}; };
@ -177,15 +139,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
// should never happen // should never happen
let mut state = self.state.take().unwrap(); let mut state = self.state.take().unwrap();
// TODO: does this have a real positive performance impact?
remove_empty_edges(
ctx,
&mut state.graph,
&mut state.conditions_cache,
universe,
&mut state.dead_end_path_cache,
)?;
// If the cur_distance_idx does not point to a valid cost in the `all_distances` // If the cur_distance_idx does not point to a valid cost in the `all_distances`
// structure, then we have computed all the buckets and can return. // structure, then we have computed all the buckets and can return.
if state.cur_distance_idx if state.cur_distance_idx
@ -205,7 +158,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
let GraphBasedRankingRuleState { let GraphBasedRankingRuleState {
graph, graph,
conditions_cache: condition_docids_cache, conditions_cache: condition_docids_cache,
dead_end_path_cache, dead_ends_cache: dead_end_path_cache,
all_distances, all_distances,
cur_distance_idx: _, cur_distance_idx: _,
} = &mut state; } = &mut state;

View File

@ -10,12 +10,6 @@ use crate::search::new::query_graph::QueryNode;
use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
use crate::Result; use crate::Result;
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Path {
pub edges: Vec<u16>,
pub cost: u64,
}
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
pub fn visit_paths_of_cost( pub fn visit_paths_of_cost(
&mut self, &mut self,