From 0e1fbbf7c60e9b79a35edb66737bc2f9707d6490 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 27 Feb 2023 14:16:39 +0100 Subject: [PATCH] Fix bugs in query graph's "remove word" and "cheapest paths" algos --- milli/src/search/new/logger/detailed.rs | 2 +- milli/src/search/new/query_graph.rs | 6 +----- .../new/ranking_rule_graph/cheapest_paths.rs | 17 ++++++----------- milli/src/search/new/ranking_rules.rs | 2 +- 4 files changed, 9 insertions(+), 18 deletions(-) diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs index 86fcacb3e..b3f5bbcce 100644 --- a/milli/src/search/new/logger/detailed.rs +++ b/milli/src/search/new/logger/detailed.rs @@ -326,7 +326,7 @@ shape: class").unwrap(); fn paths_d2_description(graph: &RankingRuleGraph, paths_idx: &str, paths: &PathsMap, file: &mut File) { for (edge_idx, rest) in paths.nodes.iter() { - let Some(Edge { from_node, to_node, cost, .. }) = graph.all_edges[*edge_idx as usize].as_ref() else { continue }; + let Edge { from_node, to_node, cost, .. } = graph.all_edges[*edge_idx as usize].as_ref().unwrap() ; let from_node = &graph.query_graph.nodes[*from_node as usize]; let from_node_desc = match from_node { QueryNode::Term(term) => match &term.value { diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs index 5ed746c27..c07343c9b 100644 --- a/milli/src/search/new/query_graph.rs +++ b/milli/src/search/new/query_graph.rs @@ -192,18 +192,14 @@ impl QueryGraph { } pub fn remove_words_at_position(&mut self, position: i8) { let mut nodes_to_remove_keeping_edges = vec![]; - let mut nodes_to_remove = vec![]; for (node_idx, node) in self.nodes.iter().enumerate() { let node_idx = node_idx as u32; let QueryNode::Term(LocatedQueryTerm { value: _, positions }) = node else { continue }; - if positions.contains(&position) { + if positions.start() == &position { nodes_to_remove_keeping_edges.push(node_idx) - } else if positions.contains(&position) { - nodes_to_remove.push(node_idx) } } - self.remove_nodes(&nodes_to_remove); self.remove_nodes_keep_edges(&nodes_to_remove_keeping_edges); self.simplify(); diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index fdce85159..759780200 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -74,13 +74,17 @@ impl KCheapestPathsState { empty_paths_cache: &EmptyPathsCache, into_map: &mut PathsMap, ) -> Option { - into_map.add_path(&self.kth_cheapest_path); + if !empty_paths_cache.path_is_empty(&self.kth_cheapest_path.edges) { + into_map.add_path(&self.kth_cheapest_path); + } let cur_cost = self.kth_cheapest_path.cost; while self.kth_cheapest_path.cost <= cur_cost { if let Some(next_self) = self.compute_next_cheapest_paths(graph, empty_paths_cache) { self = next_self; if self.kth_cheapest_path.cost == cur_cost { into_map.add_path(&self.kth_cheapest_path); + } else { + break; } } else { return None; @@ -89,8 +93,6 @@ impl KCheapestPathsState { Some(self) } - // TODO: use the cache to potentially remove edges that return an empty RoaringBitmap - // TODO: return an Option<&'self Path>? fn compute_next_cheapest_paths( mut self, graph: &mut RankingRuleGraph, @@ -141,19 +143,12 @@ impl KCheapestPathsState { } while let Some(mut next_cheapest_paths_entry) = self.potential_cheapest_paths.first_entry() { - // This could be implemented faster - // Here, maybe I should filter the potential cheapest paths so that they - // don't contain any removed edge? - let cost = *next_cheapest_paths_entry.key(); let next_cheapest_paths = next_cheapest_paths_entry.get_mut(); while let Some((next_cheapest_path, cost2)) = next_cheapest_paths.remove_first() { assert_eq!(cost, cost2); - if next_cheapest_path - .iter() - .any(|edge_index| graph.all_edges[*edge_index as usize].is_none()) - { + if empty_paths_cache.path_is_empty(&next_cheapest_path) { continue; } else { self.cheapest_paths.insert(next_cheapest_path.iter().copied(), cost); diff --git a/milli/src/search/new/ranking_rules.rs b/milli/src/search/new/ranking_rules.rs index d8d754f21..74ded6d97 100644 --- a/milli/src/search/new/ranking_rules.rs +++ b/milli/src/search/new/ranking_rules.rs @@ -313,7 +313,7 @@ mod tests { let mut db_cache = DatabaseCache::default(); let query_graph = - make_query_graph(&index, &txn, &mut db_cache, "released from prison by the government") + make_query_graph(&index, &txn, &mut db_cache, "the sun flower is facing the su") .unwrap(); // TODO: filters + maybe distinct attributes?