From dcf3f1d18a3afad62cc52e99a5c359e4c610f1e9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 21 Feb 2023 12:55:44 +0100 Subject: [PATCH] Remove EdgeIndex and NodeIndex types, prefer u32 instead --- milli/src/search/new/db_cache.rs | 13 ++-- milli/src/search/new/query_graph.rs | 62 ++++++++---------- .../search/new/ranking_rule_graph/build.rs | 8 +-- .../new/ranking_rule_graph/cheapest_paths.rs | 63 +++++++------------ .../ranking_rule_graph/edge_docids_cache.rs | 19 +++--- .../ranking_rule_graph/empty_paths_cache.rs | 11 ++-- .../src/search/new/ranking_rule_graph/mod.rs | 53 +++++++--------- .../new/ranking_rule_graph/paths_map.rs | 52 +++++++-------- .../new/ranking_rule_graph/resolve_paths.rs | 2 +- milli/src/search/new/resolve_query_graph.rs | 21 ++++--- 10 files changed, 139 insertions(+), 165 deletions(-) diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 15f9f7873..0a058d339 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -1,17 +1,18 @@ -use std::collections::{hash_map::Entry, HashMap}; +use std::collections::hash_map::Entry; +use fxhash::FxHashMap; use heed::{types::ByteSlice, RoTxn}; use crate::{Index, Result}; #[derive(Default)] pub struct DatabaseCache<'transaction> { - pub word_pair_proximity_docids: HashMap<(u8, String, String), Option<&'transaction [u8]>>, + pub word_pair_proximity_docids: FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, pub word_prefix_pair_proximity_docids: - HashMap<(u8, String, String), Option<&'transaction [u8]>>, - pub word_docids: HashMap>, - pub exact_word_docids: HashMap>, - pub word_prefix_docids: HashMap>, + FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, + pub word_docids: FxHashMap>, + pub exact_word_docids: FxHashMap>, + pub word_prefix_docids: FxHashMap>, } impl<'transaction> DatabaseCache<'transaction> { pub fn get_word_docids( diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs index 821c1a226..cbe319af7 100644 --- a/milli/src/search/new/query_graph.rs +++ b/milli/src/search/new/query_graph.rs @@ -26,18 +26,10 @@ pub struct Edges { pub successors: RoaringBitmap, } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct NodeIndex(pub u32); -impl fmt::Display for NodeIndex { - fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { - fmt::Display::fmt(&self.0, f) - } -} - #[derive(Debug, Clone)] pub struct QueryGraph { - pub root_node: NodeIndex, - pub end_node: NodeIndex, + pub root_node: u32, + pub end_node: u32, pub nodes: Vec, pub edges: Vec, } @@ -56,28 +48,28 @@ impl Default for QueryGraph { Edges { predecessors: RoaringBitmap::new(), successors: RoaringBitmap::new() }, ]; - Self { root_node: NodeIndex(0), end_node: NodeIndex(1), nodes, edges } + Self { root_node: 0, end_node: 1, nodes, edges } } } impl QueryGraph { - fn connect_to_node(&mut self, from_nodes: &[NodeIndex], to_node: NodeIndex) { + fn connect_to_node(&mut self, from_nodes: &[u32], to_node: u32) { for &from_node in from_nodes { - self.edges[from_node.0 as usize].successors.insert(to_node.0); - self.edges[to_node.0 as usize].predecessors.insert(from_node.0); + self.edges[from_node as usize].successors.insert(to_node); + self.edges[to_node as usize].predecessors.insert(from_node); } } - fn add_node(&mut self, from_nodes: &[NodeIndex], node: QueryNode) -> NodeIndex { + fn add_node(&mut self, from_nodes: &[u32], node: QueryNode) -> u32 { let new_node_idx = self.nodes.len() as u32; self.nodes.push(node); self.edges.push(Edges { - predecessors: from_nodes.iter().map(|x| x.0).collect(), + predecessors: from_nodes.iter().collect(), successors: RoaringBitmap::new(), }); for from_node in from_nodes { - self.edges[from_node.0 as usize].successors.insert(new_node_idx); + self.edges[*from_node as usize].successors.insert(new_node_idx); } - NodeIndex(new_node_idx) + new_node_idx } } @@ -99,7 +91,7 @@ impl QueryGraph { let word_set = index.words_fst(txn)?; let mut graph = QueryGraph::default(); - let (mut prev2, mut prev1, mut prev0): (Vec, Vec, Vec) = + let (mut prev2, mut prev1, mut prev0): (Vec, Vec, Vec) = (vec![], vec![], vec![graph.root_node]); // TODO: add all the word derivations found in the fst @@ -173,33 +165,33 @@ impl QueryGraph { Ok(graph) } - pub fn remove_nodes(&mut self, nodes: &[NodeIndex]) { + pub fn remove_nodes(&mut self, nodes: &[u32]) { for &node in nodes { - self.nodes[node.0 as usize] = QueryNode::Deleted; - let edges = self.edges[node.0 as usize].clone(); + self.nodes[node as usize] = QueryNode::Deleted; + let edges = self.edges[node as usize].clone(); for pred in edges.predecessors.iter() { - self.edges[pred as usize].successors.remove(node.0); + self.edges[pred as usize].successors.remove(node); } for succ in edges.successors { - self.edges[succ as usize].predecessors.remove(node.0); + self.edges[succ as usize].predecessors.remove(node); } - self.edges[node.0 as usize] = + self.edges[node as usize] = Edges { predecessors: RoaringBitmap::new(), successors: RoaringBitmap::new() }; } } - pub fn remove_nodes_keep_edges(&mut self, nodes: &[NodeIndex]) { + pub fn remove_nodes_keep_edges(&mut self, nodes: &[u32]) { for &node in nodes { - self.nodes[node.0 as usize] = QueryNode::Deleted; - let edges = self.edges[node.0 as usize].clone(); + self.nodes[node as usize] = QueryNode::Deleted; + let edges = self.edges[node as usize].clone(); for pred in edges.predecessors.iter() { - self.edges[pred as usize].successors.remove(node.0); + self.edges[pred as usize].successors.remove(node); self.edges[pred as usize].successors |= &edges.successors; } for succ in edges.successors { - self.edges[succ as usize].predecessors.remove(node.0); + self.edges[succ as usize].predecessors.remove(node); self.edges[succ as usize].predecessors |= &edges.predecessors; } - self.edges[node.0 as usize] = + self.edges[node as usize] = Edges { predecessors: RoaringBitmap::new(), successors: RoaringBitmap::new() }; } } @@ -207,7 +199,7 @@ impl QueryGraph { let mut nodes_to_remove_keeping_edges = vec![]; let mut nodes_to_remove = vec![]; for (node_idx, node) in self.nodes.iter().enumerate() { - let node_idx = NodeIndex(node_idx as u32); + let node_idx = node_idx as u32; let QueryNode::Term(LocatedQueryTerm { value: _, positions }) = node else { continue }; if positions.contains(&position) { nodes_to_remove_keeping_edges.push(node_idx) @@ -231,7 +223,7 @@ impl QueryGraph { || (!matches!(node, QueryNode::Start | QueryNode::Deleted) && self.edges[node_idx].predecessors.is_empty()) { - nodes_to_remove.push(NodeIndex(node_idx as u32)); + nodes_to_remove.push(node_idx as u32); } } if nodes_to_remove.is_empty() { @@ -315,9 +307,9 @@ node [shape = "record"] continue; } desc.push_str(&format!("{node} [label = {:?}]", &self.nodes[node],)); - if node == self.root_node.0 as usize { + if node == self.root_node as usize { desc.push_str("[color = blue]"); - } else if node == self.end_node.0 as usize { + } else if node == self.end_node as usize { desc.push_str("[color = red]"); } desc.push_str(";\n"); diff --git a/milli/src/search/new/ranking_rule_graph/build.rs b/milli/src/search/new/ranking_rule_graph/build.rs index 45dda3c1f..6978491cd 100644 --- a/milli/src/search/new/ranking_rule_graph/build.rs +++ b/milli/src/search/new/ranking_rule_graph/build.rs @@ -1,11 +1,11 @@ -use std::collections::{BTreeSet, HashMap, HashSet}; +use std::collections::{BTreeSet, HashSet}; use heed::RoTxn; use roaring::RoaringBitmap; use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait}; use crate::new::db_cache::DatabaseCache; -use crate::new::{NodeIndex, QueryGraph}; +use crate::new::QueryGraph; use crate::{Index, Result}; impl RankingRuleGraph { @@ -36,8 +36,8 @@ impl RankingRuleGraph { edges.sort_by_key(|e| e.0); for (cost, details) in edges { ranking_rule_graph.all_edges.push(Some(Edge { - from_node: NodeIndex(node_idx as u32), - to_node: NodeIndex(successor_idx), + from_node: node_idx as u32, + to_node: successor_idx, cost, details, })); diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index f1c1035a3..00babb560 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -3,25 +3,23 @@ use std::collections::{BTreeMap, HashSet}; use itertools::Itertools; use roaring::RoaringBitmap; -use crate::new::NodeIndex; - use super::{ - empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, Edge, EdgeIndex, RankingRuleGraph, + empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, Edge, RankingRuleGraph, RankingRuleGraphTrait, }; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Path { - pub edges: Vec, + pub edges: Vec, pub cost: u64, } struct DijkstraState { unvisited: RoaringBitmap, // should be a small bitset? distances: Vec, // or binary heap, or btreemap? (f64, usize) - edges: Vec, + edges: Vec, edge_costs: Vec, - paths: Vec>, + paths: Vec>, } pub struct KCheapestPathsState { @@ -104,29 +102,26 @@ impl KCheapestPathsState { .iter() .enumerate() { - let Some(edge) = graph.all_edges[edge_idx.0].as_ref() else { continue; }; + let Some(edge) = graph.all_edges[*edge_idx as usize].as_ref() else { continue; }; let Edge { from_node: spur_node, .. } = edge; - // TODO: - // Here, check that the root path is not dicarded by the empty_paths_cache - // If it is, then continue to the next spur_node let root_path = &self.kth_cheapest_path.edges[..i]; if empty_paths_cache.path_is_empty(root_path) { continue; } - let root_cost = root_path - .iter() - .fold(0, |sum, next| sum + graph.get_edge(*next).as_ref().unwrap().cost as u64); + let root_cost = root_path.iter().fold(0, |sum, next| { + sum + graph.all_edges[*next as usize].as_ref().unwrap().cost as u64 + }); let mut tmp_removed_edges = vec![]; // for all the paths already found that share a common prefix with the root path // we delete the edge from the spur node to the next one for edge_index_to_remove in self.cheapest_paths.edge_indices_after_prefix(root_path) { let was_removed = - graph.node_edges[spur_node.0 as usize].remove(edge_index_to_remove.0 as u32); + graph.node_edges[*spur_node as usize].remove(edge_index_to_remove); if was_removed { - tmp_removed_edges.push(edge_index_to_remove.0 as u32); + tmp_removed_edges.push(edge_index_to_remove); } } @@ -134,7 +129,7 @@ impl KCheapestPathsState { // we will combine it with the root path to get a potential kth cheapest path let spur_path = graph.cheapest_path_to_end(*spur_node); // restore the temporarily removed edges - graph.node_edges[spur_node.0 as usize].extend(tmp_removed_edges); + graph.node_edges[*spur_node as usize].extend(tmp_removed_edges); let Some(spur_path) = spur_path else { continue; }; let total_cost = root_cost + spur_path.cost; @@ -158,7 +153,7 @@ impl KCheapestPathsState { assert_eq!(cost, cost2); if next_cheapest_path .iter() - .any(|edge_index| graph.all_edges.get(edge_index.0).is_none()) + .any(|edge_index| graph.all_edges[*edge_index as usize].is_none()) { continue; } else { @@ -179,15 +174,15 @@ impl KCheapestPathsState { } impl RankingRuleGraph { - fn cheapest_path_to_end(&self, from: NodeIndex) -> Option { + fn cheapest_path_to_end(&self, from: u32) -> Option { let mut dijkstra = DijkstraState { unvisited: (0..self.query_graph.nodes.len() as u32).collect(), distances: vec![u64::MAX; self.query_graph.nodes.len()], - edges: vec![EdgeIndex(usize::MAX); self.query_graph.nodes.len()], + edges: vec![u32::MAX; self.query_graph.nodes.len()], edge_costs: vec![u8::MAX; self.query_graph.nodes.len()], paths: vec![None; self.query_graph.nodes.len()], }; - dijkstra.distances[from.0 as usize] = 0; + dijkstra.distances[from as usize] = 0; // TODO: could use a binary heap here to store the distances, or a btreemap while let Some(cur_node) = @@ -197,55 +192,43 @@ impl RankingRuleGraph { if cur_node_dist == u64::MAX { return None; } - if cur_node == self.query_graph.end_node.0 { + if cur_node == self.query_graph.end_node { break; } - // this is expensive, but shouldn't - // ideally I could quickly get a bitmap of all a node's successors - // then take the intersection with unvisited - let succ_cur_node: &RoaringBitmap = &self.successors[cur_node as usize]; - // .iter() - // .map(|e| self.all_edges[e as usize].as_ref().unwrap().to_node.0) - // .collect(); - // TODO: this intersection may be slow but shouldn't be, - // can use a bitmap intersection instead + let succ_cur_node = &self.successors[cur_node as usize]; let unvisited_succ_cur_node = succ_cur_node & &dijkstra.unvisited; for succ in unvisited_succ_cur_node { - // cheapest_edge() is also potentially too expensive - let Some((cheapest_edge, cheapest_edge_cost)) = self.cheapest_edge(NodeIndex(cur_node), NodeIndex(succ)) else { + let Some((cheapest_edge, cheapest_edge_cost)) = self.cheapest_edge(cur_node, succ) else { continue }; - // println!("cur node dist {cur_node_dist}"); let old_dist_succ = &mut dijkstra.distances[succ as usize]; let new_potential_distance = cur_node_dist + cheapest_edge_cost as u64; if new_potential_distance < *old_dist_succ { *old_dist_succ = new_potential_distance; dijkstra.edges[succ as usize] = cheapest_edge; dijkstra.edge_costs[succ as usize] = cheapest_edge_cost; - dijkstra.paths[succ as usize] = Some(NodeIndex(cur_node)); + dijkstra.paths[succ as usize] = Some(cur_node); } } dijkstra.unvisited.remove(cur_node); } let mut cur = self.query_graph.end_node; - // let mut edge_costs = vec![]; - // let mut distances = vec![]; let mut path_edges = vec![]; - while let Some(n) = dijkstra.paths[cur.0 as usize] { - path_edges.push(dijkstra.edges[cur.0 as usize]); + while let Some(n) = dijkstra.paths[cur as usize] { + path_edges.push(dijkstra.edges[cur as usize]); cur = n; } path_edges.reverse(); Some(Path { edges: path_edges, - cost: dijkstra.distances[self.query_graph.end_node.0 as usize], + cost: dijkstra.distances[self.query_graph.end_node as usize], }) } - pub fn cheapest_edge(&self, cur_node: NodeIndex, succ: NodeIndex) -> Option<(EdgeIndex, u8)> { + pub fn cheapest_edge(&self, cur_node: u32, succ: u32) -> Option<(u32, u8)> { self.visit_edges(cur_node, succ, |edge_idx, edge| { std::ops::ControlFlow::Break((edge_idx, edge.cost)) }) diff --git a/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs b/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs index 0c9768f04..263b78d6a 100644 --- a/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs @@ -1,10 +1,11 @@ use std::collections::HashMap; use std::marker::PhantomData; +use fxhash::FxHashMap; use heed::RoTxn; use roaring::RoaringBitmap; -use super::{EdgeDetails, EdgeIndex, RankingRuleGraph, RankingRuleGraphTrait}; +use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; use crate::new::db_cache::DatabaseCache; use crate::new::BitmapOrAllRef; use crate::{Index, Result}; @@ -16,12 +17,12 @@ use crate::{Index, Result}; // by using a pointer (real, Rc, bumpalo, or in a vector)??? pub struct EdgeDocidsCache { - pub cache: HashMap, + pub cache: FxHashMap, // TODO: There is a big difference between `cache`, which is always valid, and // `empty_path_prefixes`, which is only accurate for a particular universe // ALSO, we should have a universe-specific `empty_edge` to use - // pub empty_path_prefixes: HashSet>, + // pub empty_path_prefixes: HashSet>, _phantom: PhantomData, } impl Default for EdgeDocidsCache { @@ -39,21 +40,21 @@ impl EdgeDocidsCache { index: &Index, txn: &'transaction RoTxn, db_cache: &mut DatabaseCache<'transaction>, - edge_index: &EdgeIndex, + edge_index: u32, graph: &RankingRuleGraph, ) -> Result> { - if self.cache.contains_key(edge_index) { - return Ok(BitmapOrAllRef::Bitmap(&self.cache[edge_index])); + if self.cache.contains_key(&edge_index) { + return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index])); } - let edge = graph.get_edge(*edge_index).as_ref().unwrap(); + let edge = graph.all_edges[edge_index as usize].as_ref().unwrap(); match &edge.details { EdgeDetails::Unconditional => Ok(BitmapOrAllRef::All), EdgeDetails::Data(details) => { let docids = G::compute_docids(index, txn, db_cache, details)?; - let _ = self.cache.insert(*edge_index, docids); - let docids = &self.cache[edge_index]; + let _ = self.cache.insert(edge_index, docids); + let docids = &self.cache[&edge_index]; Ok(BitmapOrAllRef::Bitmap(docids)) } } diff --git a/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs index 989a08a0d..5748dce3c 100644 --- a/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs @@ -1,17 +1,18 @@ use std::collections::HashSet; -use super::{paths_map::PathsMap, EdgeIndex}; +use roaring::RoaringBitmap; + +use super::paths_map::PathsMap; #[derive(Default)] pub struct EmptyPathsCache { - pub empty_edges: HashSet, + pub empty_edges: RoaringBitmap, pub empty_prefixes: PathsMap<()>, } impl EmptyPathsCache { - pub fn path_is_empty(&self, path: &[EdgeIndex]) -> bool { + pub fn path_is_empty(&self, path: &[u32]) -> bool { for edge in path { - // TODO: should be a bitmap intersection - if self.empty_edges.contains(edge) { + if self.empty_edges.contains(*edge) { return true; } } diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index f7a312240..b5c928ffa 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -6,14 +6,14 @@ pub mod paths_map; pub mod proximity; pub mod resolve_paths; -use std::collections::{BTreeSet, HashMap, HashSet}; +use std::collections::{BTreeSet, HashSet}; use std::ops::ControlFlow; use heed::RoTxn; use roaring::RoaringBitmap; use super::db_cache::DatabaseCache; -use super::{NodeIndex, QueryGraph, QueryNode}; +use super::{QueryGraph, QueryNode}; use crate::{Index, Result}; #[derive(Debug, Clone)] @@ -24,21 +24,18 @@ pub enum EdgeDetails { #[derive(Debug, Clone)] pub struct Edge { - from_node: NodeIndex, - to_node: NodeIndex, + from_node: u32, + to_node: u32, cost: u8, details: EdgeDetails, } #[derive(Debug, Clone)] pub struct EdgePointer<'graph, E> { - pub index: EdgeIndex, + pub index: u32, pub edge: &'graph Edge, } -#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] -pub struct EdgeIndex(pub usize); - pub trait RankingRuleGraphTrait { /// The details of an edge connecting two query nodes. These details /// should be sufficient to compute the edge's cost and associated document ids @@ -103,26 +100,22 @@ pub struct RankingRuleGraph { // TODO: // node_successors? - // pub removed_edges: HashSet, - // pub tmp_removed_edges: HashSet, + // pub removed_edges: HashSet, + // pub tmp_removed_edges: HashSet, } impl RankingRuleGraph { - pub fn get_edge(&self, edge_index: EdgeIndex) -> &Option> { - &self.all_edges[edge_index.0] - } - // Visit all edges between the two given nodes in order of increasing cost. pub fn visit_edges<'graph, O>( &'graph self, - from: NodeIndex, - to: NodeIndex, - mut visit: impl FnMut(EdgeIndex, &'graph Edge) -> ControlFlow, + from: u32, + to: u32, + mut visit: impl FnMut(u32, &'graph Edge) -> ControlFlow, ) -> Option { - let from_edges = &self.node_edges[from.0 as usize]; + let from_edges = &self.node_edges[from as usize]; for edge_idx in from_edges { let edge = self.all_edges[edge_idx as usize].as_ref().unwrap(); if edge.to_node == to { - let cf = visit(EdgeIndex(edge_idx as usize), edge); + let cf = visit(edge_idx, edge); match cf { ControlFlow::Continue(_) => continue, ControlFlow::Break(o) => return Some(o), @@ -133,21 +126,21 @@ impl RankingRuleGraph { None } - fn remove_edge(&mut self, edge_index: EdgeIndex) { - let edge_opt = &mut self.all_edges[edge_index.0]; + fn remove_edge(&mut self, edge_index: u32) { + let edge_opt = &mut self.all_edges[edge_index as usize]; let Some(edge) = &edge_opt else { return }; let (from_node, to_node) = (edge.from_node, edge.to_node); *edge_opt = None; - let from_node_edges = &mut self.node_edges[from_node.0 as usize]; - from_node_edges.remove(edge_index.0 as u32); + let from_node_edges = &mut self.node_edges[from_node as usize]; + from_node_edges.remove(edge_index); let mut new_successors_from_node = RoaringBitmap::new(); - for edge in from_node_edges.iter() { - let Edge { to_node, .. } = &self.all_edges[edge as usize].as_ref().unwrap(); - new_successors_from_node.insert(to_node.0); + for from_node_edge in from_node_edges.iter() { + let Edge { to_node, .. } = &self.all_edges[from_node_edge as usize].as_ref().unwrap(); + new_successors_from_node.insert(*to_node); } - self.successors[from_node.0 as usize] = new_successors_from_node; + self.successors[from_node as usize] = new_successors_from_node; } // pub fn remove_nodes(&mut self, nodes: &[usize]) { // for &node in nodes { @@ -190,7 +183,7 @@ impl RankingRuleGraph { // } // } // } - // fn is_removed_edge(&self, edge: EdgeIndex) -> bool { + // fn is_removed_edge(&self, edge: u32) -> bool { // self.removed_edges.contains(&edge) || self.tmp_removed_edges.contains(&edge) // } @@ -203,9 +196,9 @@ impl RankingRuleGraph { continue; } desc.push_str(&format!("{node_idx} [label = {:?}]", node)); - if node_idx == self.query_graph.root_node.0 as usize { + if node_idx == self.query_graph.root_node as usize { desc.push_str("[color = blue]"); - } else if node_idx == self.query_graph.end_node.0 as usize { + } else if node_idx == self.query_graph.end_node as usize { desc.push_str("[color = red]"); } desc.push_str(";\n"); diff --git a/milli/src/search/new/ranking_rule_graph/paths_map.rs b/milli/src/search/new/ranking_rule_graph/paths_map.rs index b1e4bb451..572cb975f 100644 --- a/milli/src/search/new/ranking_rule_graph/paths_map.rs +++ b/milli/src/search/new/ranking_rule_graph/paths_map.rs @@ -3,14 +3,16 @@ use std::collections::HashSet; use std::fmt::Write; use std::hash::{Hash, Hasher}; +use roaring::RoaringBitmap; + use super::cheapest_paths::Path; -use super::{EdgeDetails, EdgeIndex, RankingRuleGraph, RankingRuleGraphTrait, Edge}; +use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait, Edge}; use crate::new::QueryNode; #[derive(Debug)] pub struct PathsMap { - nodes: Vec<(EdgeIndex, PathsMap)>, + nodes: Vec<(u32, PathsMap)>, value: Option } impl Default for PathsMap { @@ -36,7 +38,7 @@ impl PathsMap { self.nodes.is_empty() && self.value.is_none() } - pub fn insert(&mut self, mut edges: impl Iterator, value: V) { + pub fn insert(&mut self, mut edges: impl Iterator, value: V) { match edges.next() { None => { self.value = Some(value); @@ -54,7 +56,7 @@ impl PathsMap { } } } - fn remove_first_rec(&mut self, cur: &mut Vec) -> (bool, V) { + fn remove_first_rec(&mut self, cur: &mut Vec) -> (bool, V) { let Some((first_edge, rest)) = self.nodes.first_mut() else { // The PathsMap has to be correct by construction here, otherwise // the unwrap() will crash @@ -69,7 +71,7 @@ impl PathsMap { (false, value) } } - pub fn remove_first(&mut self) -> Option<(Vec, V)> { + pub fn remove_first(&mut self) -> Option<(Vec, V)> { if self.is_empty() { return None } @@ -78,7 +80,7 @@ impl PathsMap { let (_, value) = self.remove_first_rec(&mut result); Some((result, value)) } - pub fn iterate_rec(&self, cur: &mut Vec, visit: &mut impl FnMut(&Vec, &V)) { + pub fn iterate_rec(&self, cur: &mut Vec, visit: &mut impl FnMut(&Vec, &V)) { if let Some(value) = &self.value { visit(cur, value); } @@ -88,7 +90,7 @@ impl PathsMap { cur.pop(); } } - pub fn iterate(&self, mut visit: impl FnMut(&Vec, &V)) { + pub fn iterate(&self, mut visit: impl FnMut(&Vec, &V)) { self.iterate_rec(&mut vec![], &mut visit) } @@ -97,10 +99,10 @@ impl PathsMap { self.remove_prefix(prefix); }); } - pub fn remove_edges(&mut self, forbidden_edges: &HashSet) { + pub fn remove_edges(&mut self, forbidden_edges: &RoaringBitmap) { let mut i = 0; while i < self.nodes.len() { - let should_remove = if forbidden_edges.contains(&self.nodes[i].0) { + let should_remove = if forbidden_edges.contains(self.nodes[i].0) { true } else if !self.nodes[i].1.nodes.is_empty() { self.nodes[i].1.remove_edges(forbidden_edges); @@ -115,7 +117,7 @@ impl PathsMap { } } } - pub fn remove_edge(&mut self, forbidden_edge: &EdgeIndex) { + pub fn remove_edge(&mut self, forbidden_edge: &u32) { let mut i = 0; while i < self.nodes.len() { let should_remove = if &self.nodes[i].0 == forbidden_edge { @@ -133,7 +135,7 @@ impl PathsMap { } } } - pub fn remove_prefix(&mut self, forbidden_prefix: &[EdgeIndex]) { + pub fn remove_prefix(&mut self, forbidden_prefix: &[u32]) { let [first_edge, remaining_prefix @ ..] = forbidden_prefix else { self.nodes.clear(); self.value = None; @@ -157,7 +159,7 @@ impl PathsMap { } } - pub fn edge_indices_after_prefix(&self, prefix: &[EdgeIndex]) -> Vec { + pub fn edge_indices_after_prefix(&self, prefix: &[u32]) -> Vec { let [first_edge, remaining_prefix @ ..] = prefix else { return self.nodes.iter().map(|n| n.0).collect(); }; @@ -169,7 +171,7 @@ impl PathsMap { vec![] } - pub fn contains_prefix_of_path(&self, path: &[EdgeIndex]) -> bool { + pub fn contains_prefix_of_path(&self, path: &[u32]) -> bool { if self.value.is_some() { return true } @@ -202,7 +204,7 @@ impl PathsMap { h.finish() }; for (edge_idx, rest) in self.nodes.iter() { - let Some(Edge { from_node, to_node, cost, details }) = graph.get_edge(*edge_idx).as_ref() else { + let Some(Edge { from_node, to_node, cost, details }) = graph.all_edges[*edge_idx as usize].as_ref() else { continue; }; let mut path_to = path_from.clone(); @@ -235,9 +237,9 @@ impl RankingRuleGraph { continue; } desc.push_str(&format!("{node_idx} [label = {:?}]", node)); - if node_idx == self.query_graph.root_node.0 as usize { + if node_idx == self.query_graph.root_node as usize { desc.push_str("[color = blue]"); - } else if node_idx == self.query_graph.end_node.0 as usize { + } else if node_idx == self.query_graph.end_node as usize { desc.push_str("[color = red]"); } desc.push_str(";\n"); @@ -246,7 +248,7 @@ impl RankingRuleGraph { for (edge_idx, edge) in self.all_edges.iter().enumerate() { let Some(edge) = edge else { continue }; let Edge { from_node, to_node, cost, details } = edge; - let color = if path.edges.contains(&EdgeIndex(edge_idx)) { + let color = if path.edges.contains(&(edge_idx as u32)) { "red" } else { "green" @@ -283,7 +285,7 @@ mod tests { use crate::new::ranking_rule_graph::cheapest_paths::KCheapestPathsState; use crate::new::ranking_rule_graph::empty_paths_cache::EmptyPathsCache; use crate::new::ranking_rule_graph::proximity::ProximityGraph; - use crate::new::ranking_rule_graph::{RankingRuleGraph, EdgeIndex}; + use crate::new::ranking_rule_graph::RankingRuleGraph; use crate::search::new::query_term::{word_derivations, LocatedQueryTerm}; use crate::search::new::QueryGraph; use charabia::Tokenize; @@ -358,11 +360,11 @@ mod tests { let desc = path_tree.graphviz(&prox_graph); println!("{desc}"); - // let path = vec![EdgeIndex { from: 0, to: 2, edge_idx: 0 }, EdgeIndex { from: 2, to: 3, edge_idx: 0 }, EdgeIndex { from: 3, to: 4, edge_idx: 0 }, EdgeIndex { from: 4, to: 5, edge_idx: 0 }, EdgeIndex { from: 5, to: 8, edge_idx: 0 }, EdgeIndex { from: 8, to: 1, edge_idx: 0 }, EdgeIndex { from: 1, to: 10, edge_idx: 0 }]; + // let path = vec![u32 { from: 0, to: 2, edge_idx: 0 }, u32 { from: 2, to: 3, edge_idx: 0 }, u32 { from: 3, to: 4, edge_idx: 0 }, u32 { from: 4, to: 5, edge_idx: 0 }, u32 { from: 5, to: 8, edge_idx: 0 }, u32 { from: 8, to: 1, edge_idx: 0 }, u32 { from: 1, to: 10, edge_idx: 0 }]; // println!("{}", psath_tree.contains_prefix_of_path(&path)); - // let path = vec![EdgeIndex { from: 0, to: 2, edge_idx: 0 }, EdgeIndex { from: 2, to: 3, edge_idx: 0 }, EdgeIndex { from: 3, to: 4, edge_idx: 0 }, EdgeIndex { from: 4, to: 5, edge_idx: 0 }, EdgeIndex { from: 5, to: 6, edge_idx: 0 }, EdgeIndex { from: 6, to: 7, edge_idx: 0 }, EdgeIndex { from: 7, to: 1, edge_idx: 0 }]; + // let path = vec![u32 { from: 0, to: 2, edge_idx: 0 }, u32 { from: 2, to: 3, edge_idx: 0 }, u32 { from: 3, to: 4, edge_idx: 0 }, u32 { from: 4, to: 5, edge_idx: 0 }, u32 { from: 5, to: 6, edge_idx: 0 }, u32 { from: 6, to: 7, edge_idx: 0 }, u32 { from: 7, to: 1, edge_idx: 0 }]; // path_tree.iterate(|path, cost| { @@ -370,18 +372,18 @@ mod tests { // }); // path_tree.remove_forbidden_prefix(&[ - // EdgeIndex { from: 0, to: 2, edge_idx: 0 }, - // EdgeIndex { from: 2, to: 3, edge_idx: 2 }, + // u32 { from: 0, to: 2, edge_idx: 0 }, + // u32 { from: 2, to: 3, edge_idx: 2 }, // ]); // let desc = path_tree.graphviz(); // println!("{desc}"); - // path_tree.remove_forbidden_edge(&EdgeIndex { from: 5, to: 6, cost: 1 }); + // path_tree.remove_forbidden_edge(&u32 { from: 5, to: 6, cost: 1 }); // let desc = path_tree.graphviz(); // println!("AFTER REMOVING 5-6 [1]:\n{desc}"); - // path_tree.remove_forbidden_edge(&EdgeIndex { from: 3, to: 4, cost: 1 }); + // path_tree.remove_forbidden_edge(&u32 { from: 3, to: 4, cost: 1 }); // let desc = path_tree.graphviz(); // println!("AFTER REMOVING 3-4 [1]:\n{desc}"); @@ -396,7 +398,7 @@ mod tests { // let desc = path_tree.graphviz(); // println!("AFTER REMOVING: {desc}"); - // path_tree.remove_all_containing_edge(&EdgeIndex { from: 5, to: 6, cost: 2 }); + // path_tree.remove_all_containing_edge(&u32 { from: 5, to: 6, cost: 2 }); // let desc = path_tree.graphviz(); // println!("{desc}"); diff --git a/milli/src/search/new/ranking_rule_graph/resolve_paths.rs b/milli/src/search/new/ranking_rule_graph/resolve_paths.rs index 76823a32a..95cf4629b 100644 --- a/milli/src/search/new/ranking_rule_graph/resolve_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/resolve_paths.rs @@ -35,7 +35,7 @@ impl RankingRuleGraph { 'edge_loop: for edge_index in edge_indexes { processed_edges.push(edge_index); let edge_docids = - edge_docids_cache.get_edge_docids(index, txn, db_cache, &edge_index, self)?; + edge_docids_cache.get_edge_docids(index, txn, db_cache, edge_index, self)?; match edge_docids { BitmapOrAllRef::Bitmap(edge_docids) => { if edge_docids.is_disjoint(universe) { diff --git a/milli/src/search/new/resolve_query_graph.rs b/milli/src/search/new/resolve_query_graph.rs index 8bc56bb23..e0e3c5321 100644 --- a/milli/src/search/new/resolve_query_graph.rs +++ b/milli/src/search/new/resolve_query_graph.rs @@ -1,10 +1,11 @@ +use fxhash::FxHashMap; use heed::{BytesDecode, RoTxn}; use roaring::{MultiOps, RoaringBitmap}; use std::collections::{HashMap, HashSet, VecDeque}; use super::db_cache::DatabaseCache; use super::query_term::{QueryTerm, WordDerivations}; -use super::{NodeIndex, QueryGraph}; +use super::QueryGraph; use crate::{Index, Result, RoaringBitmapCodec}; // TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc. @@ -12,7 +13,7 @@ use crate::{Index, Result, RoaringBitmapCodec}; // TODO: reuse NodeDocidsCache in between calls to resolve_query_graph #[derive(Default)] pub struct NodeDocIdsCache { - pub cache: HashMap, + pub cache: FxHashMap, } pub fn resolve_query_graph<'transaction>( @@ -35,7 +36,7 @@ pub fn resolve_query_graph<'transaction>( next_nodes_to_visit.push_front(q.root_node); while let Some(node) = next_nodes_to_visit.pop_front() { - let predecessors = &q.edges[node.0 as usize].predecessors; + let predecessors = &q.edges[node as usize].predecessors; if !predecessors.is_subset(&nodes_resolved) { next_nodes_to_visit.push_back(node); continue; @@ -44,7 +45,7 @@ pub fn resolve_query_graph<'transaction>( let predecessors_iter = predecessors.iter().map(|p| &nodes_docids[p as usize]); let predecessors_docids = MultiOps::union(predecessors_iter); - let n = &q.nodes[node.0 as usize]; + let n = &q.nodes[node as usize]; // println!("resolving {node} {n:?}, predecessors: {predecessors:?}, their docids: {predecessors_docids:?}"); let node_docids = match n { super::QueryNode::Term(located_term) => { @@ -96,16 +97,16 @@ pub fn resolve_query_graph<'transaction>( return Ok(predecessors_docids); } }; - nodes_resolved.insert(node.0); - nodes_docids[node.0 as usize] = node_docids; + nodes_resolved.insert(node); + nodes_docids[node as usize] = node_docids; - for succ in q.edges[node.0 as usize].successors.iter() { - if !next_nodes_to_visit.contains(&NodeIndex(succ)) && !nodes_resolved.contains(succ) { - next_nodes_to_visit.push_back(NodeIndex(succ)); + for succ in q.edges[node as usize].successors.iter() { + if !next_nodes_to_visit.contains(&succ) && !nodes_resolved.contains(succ) { + next_nodes_to_visit.push_back(succ); } } // This is currently slow but could easily be implemented very efficiently - for prec in q.edges[node.0 as usize].predecessors.iter() { + for prec in q.edges[node as usize].predecessors.iter() { if q.edges[prec as usize].successors.is_subset(&nodes_resolved) { nodes_docids[prec as usize].clear(); }