Speed up graph based ranking rule when a lot of different costs exist

This commit is contained in:
Loïc Lecrenier 2023-05-01 15:33:28 +02:00
parent 3b2c8b9f25
commit 30fb1153cc
3 changed files with 35 additions and 46 deletions

View File

@ -309,11 +309,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
Ok(ControlFlow::Continue(()))
}
})?;
// if at_least_one {
// unsafe {
// println!("\n===== {id} COST: {cost} ==== PATHS: {COUNT_PATHS} ==== NODES: {COUNT_VISITED_NODES} ===== UNIVERSE: {universe}", id=self.id, universe=universe.len());
// }
// }
logger.log_internal_state(graph);
logger.log_internal_state(&good_paths);
@ -337,8 +332,14 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
let next_query_graph = QueryGraph::build_from_paths(paths);
if !nodes_with_removed_outgoing_conditions.is_empty() {
graph.update_all_costs_before_nodes(&nodes_with_removed_outgoing_conditions, all_costs);
#[allow(clippy::comparison_chain)]
if nodes_with_removed_outgoing_conditions.len() == 1 {
graph.update_all_costs_before_node(
*nodes_with_removed_outgoing_conditions.first().unwrap(),
all_costs,
);
} else if nodes_with_removed_outgoing_conditions.len() > 1 {
*all_costs = graph.find_all_costs_to_end();
}
self.state = Some(state);

View File

@ -8,7 +8,6 @@ use crate::search::new::interner::Interner;
use crate::Result;
use fxhash::{FxHashMap, FxHasher};
use std::cmp::Ordering;
use std::collections::hash_map::Entry;
use std::collections::BTreeMap;
use std::hash::{Hash, Hasher};
@ -364,8 +363,6 @@ impl QueryGraph {
b2 c2 d e2
```
But we accept the first representation as it reduces the size
of the graph and shouldn't cause much problems.
*/
pub fn build_from_paths(
paths: Vec<Vec<(Option<LocatedQueryTermSubset>, LocatedQueryTermSubset)>>,

View File

@ -1,8 +1,11 @@
#![allow(clippy::too_many_arguments)]
use std::collections::{BTreeSet, VecDeque};
use std::iter::FromIterator;
use std::ops::ControlFlow;
use fxhash::FxHashSet;
use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::{Interned, MappedInterner};
use crate::search::new::query_graph::QueryNode;
@ -112,9 +115,6 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
}
}
}
// if there wasn't any valid path from this node to the end node, then
// this node is a dead end **for this specific cost**.
// we could encode this in the dead-ends cache
Ok(ControlFlow::Continue(any_valid))
}
@ -126,11 +126,11 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
visit: VisitFn<G>,
ctx: &mut VisitorContext<G>,
) -> Result<ControlFlow<(), bool>> {
if ctx
if !ctx
.all_costs_from_node
.get(dest_node)
.iter()
.all(|next_cost| *next_cost != self.remaining_cost)
.any(|next_cost| *next_cost == self.remaining_cost)
{
return Ok(ControlFlow::Continue(false));
}
@ -158,14 +158,12 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
) -> Result<ControlFlow<(), bool>> {
assert!(dest_node != ctx.graph.query_graph.end_node);
if self.forbidden_conditions_to_nodes.contains(dest_node)
if self.forbidden_conditions.contains(condition)
|| self.forbidden_conditions_to_nodes.contains(dest_node)
|| edge_new_nodes_to_skip.intersects(&self.visited_nodes)
{
return Ok(ControlFlow::Continue(false));
}
if self.forbidden_conditions.contains(condition) {
return Ok(ControlFlow::Continue(false));
}
// Checking that from the destination node, there is at least
// one cost that we can visit that corresponds to our remaining budget.
@ -244,48 +242,41 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
costs_to_end
}
pub fn update_all_costs_before_nodes(
pub fn update_all_costs_before_node(
&self,
removed_nodes: &BTreeSet<Interned<QueryNode>>,
node_with_removed_outgoing_conditions: Interned<QueryNode>,
costs: &mut MappedInterner<QueryNode, Vec<u64>>,
) {
// unsafe {
// FIND_ALL_COSTS_INC_COUNT += 1;
// println!(
// "update_all_costs_after_removing_edge incrementally count: {}",
// FIND_ALL_COSTS_INC_COUNT
// );
// }
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
let mut node_stack = VecDeque::new();
for node in removed_nodes.iter() {
enqueued.insert(*node);
node_stack.push_back(*node);
}
enqueued.insert(node_with_removed_outgoing_conditions);
node_stack.push_back(node_with_removed_outgoing_conditions);
while let Some(cur_node) = node_stack.pop_front() {
let mut self_costs = BTreeSet::<u64>::new();
'main_loop: while let Some(cur_node) = node_stack.pop_front() {
let mut costs_to_remove = FxHashSet::default();
for c in costs.get(cur_node) {
costs_to_remove.insert(*c);
}
let cur_node_edges = &self.edges_of_node.get(cur_node);
for edge_idx in cur_node_edges.iter() {
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
let succ_node = edge.dest_node;
let succ_costs = costs.get(succ_node);
for succ_distance in succ_costs {
self_costs.insert(edge.cost as u64 + succ_distance);
for cost in costs.get(edge.dest_node).iter() {
costs_to_remove.remove(&(*cost + edge.cost as u64));
if costs_to_remove.is_empty() {
continue 'main_loop;
}
}
}
let costs_to_end_cur_node = costs.get_mut(cur_node);
for cost in self_costs.iter() {
costs_to_end_cur_node.push(*cost);
if costs_to_remove.is_empty() {
continue 'main_loop;
}
let self_costs = self_costs.into_iter().collect::<Vec<_>>();
if &self_costs == costs.get(cur_node) {
continue;
let mut new_costs = BTreeSet::from_iter(costs.get(cur_node).iter().copied());
for c in costs_to_remove {
new_costs.remove(&c);
}
*costs.get_mut(cur_node) = self_costs;
*costs.get_mut(cur_node) = new_costs.into_iter().collect();
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
if !enqueued.contains(prev_node) {