Fix bugs in query graph's "remove word" and "cheapest paths" algos

This commit is contained in:
Loïc Lecrenier 2023-02-27 14:16:39 +01:00
parent 6806640ef0
commit 0e1fbbf7c6
4 changed files with 9 additions and 18 deletions

View File

@ -326,7 +326,7 @@ shape: class").unwrap();
fn paths_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, paths: &PathsMap<u64>, file: &mut File) {
for (edge_idx, rest) in paths.nodes.iter() {
let Some(Edge { from_node, to_node, cost, .. }) = graph.all_edges[*edge_idx as usize].as_ref() else { continue };
let Edge { from_node, to_node, cost, .. } = graph.all_edges[*edge_idx as usize].as_ref().unwrap() ;
let from_node = &graph.query_graph.nodes[*from_node as usize];
let from_node_desc = match from_node {
QueryNode::Term(term) => match &term.value {

View File

@ -192,18 +192,14 @@ impl QueryGraph {
}
pub fn remove_words_at_position(&mut self, position: i8) {
let mut nodes_to_remove_keeping_edges = vec![];
let mut nodes_to_remove = vec![];
for (node_idx, node) in self.nodes.iter().enumerate() {
let node_idx = node_idx as u32;
let QueryNode::Term(LocatedQueryTerm { value: _, positions }) = node else { continue };
if positions.contains(&position) {
if positions.start() == &position {
nodes_to_remove_keeping_edges.push(node_idx)
} else if positions.contains(&position) {
nodes_to_remove.push(node_idx)
}
}
self.remove_nodes(&nodes_to_remove);
self.remove_nodes_keep_edges(&nodes_to_remove_keeping_edges);
self.simplify();

View File

@ -74,13 +74,17 @@ impl KCheapestPathsState {
empty_paths_cache: &EmptyPathsCache,
into_map: &mut PathsMap<u64>,
) -> Option<Self> {
into_map.add_path(&self.kth_cheapest_path);
if !empty_paths_cache.path_is_empty(&self.kth_cheapest_path.edges) {
into_map.add_path(&self.kth_cheapest_path);
}
let cur_cost = self.kth_cheapest_path.cost;
while self.kth_cheapest_path.cost <= cur_cost {
if let Some(next_self) = self.compute_next_cheapest_paths(graph, empty_paths_cache) {
self = next_self;
if self.kth_cheapest_path.cost == cur_cost {
into_map.add_path(&self.kth_cheapest_path);
} else {
break;
}
} else {
return None;
@ -89,8 +93,6 @@ impl KCheapestPathsState {
Some(self)
}
// TODO: use the cache to potentially remove edges that return an empty RoaringBitmap
// TODO: return an Option<&'self Path>?
fn compute_next_cheapest_paths<G: RankingRuleGraphTrait>(
mut self,
graph: &mut RankingRuleGraph<G>,
@ -141,19 +143,12 @@ impl KCheapestPathsState {
}
while let Some(mut next_cheapest_paths_entry) = self.potential_cheapest_paths.first_entry()
{
// This could be implemented faster
// Here, maybe I should filter the potential cheapest paths so that they
// don't contain any removed edge?
let cost = *next_cheapest_paths_entry.key();
let next_cheapest_paths = next_cheapest_paths_entry.get_mut();
while let Some((next_cheapest_path, cost2)) = next_cheapest_paths.remove_first() {
assert_eq!(cost, cost2);
if next_cheapest_path
.iter()
.any(|edge_index| graph.all_edges[*edge_index as usize].is_none())
{
if empty_paths_cache.path_is_empty(&next_cheapest_path) {
continue;
} else {
self.cheapest_paths.insert(next_cheapest_path.iter().copied(), cost);

View File

@ -313,7 +313,7 @@ mod tests {
let mut db_cache = DatabaseCache::default();
let query_graph =
make_query_graph(&index, &txn, &mut db_cache, "released from prison by the government")
make_query_graph(&index, &txn, &mut db_cache, "the sun flower is facing the su")
.unwrap();
// TODO: filters + maybe distinct attributes?