diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index 211fce736..4a96855ce 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -88,9 +88,8 @@ pub struct GraphBasedRankingRuleState { conditions_cache: ConditionDocIdsCache, /// Cache used to optimistically discard paths that resolve to no documents. dead_ends_cache: DeadEndsCache, - /// A structure giving the list of possible costs from each node to the end node, - /// along with a set of unavoidable edges that must be traversed to achieve that distance. - all_distances: MappedInterner)>, QueryNode>, + /// A structure giving the list of possible costs from each node to the end node + all_distances: MappedInterner, QueryNode>, /// An index in the first element of `all_distances`, giving the cost of the next bucket cur_distance_idx: usize, } @@ -108,7 +107,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase ) -> Result<()> { let graph = RankingRuleGraph::build(ctx, query_graph.clone())?; let condition_docids_cache = ConditionDocIdsCache::default(); - let dead_end_path_cache = DeadEndsCache::new(&graph.conditions_interner); + let dead_ends_cache = DeadEndsCache::new(&graph.conditions_interner); // Then pre-compute the cost of all paths from each node to the end node let all_distances = graph.initialize_distances_with_necessary_edges(); @@ -116,7 +115,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase let state = GraphBasedRankingRuleState { graph, conditions_cache: condition_docids_cache, - dead_ends_cache: dead_end_path_cache, + dead_ends_cache, all_distances, cur_distance_idx: 0, }; @@ -149,7 +148,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase } // Retrieve the cost of the paths to compute - let (cost, _) = + let cost = state.all_distances.get(state.graph.query_graph.root_node)[state.cur_distance_idx]; state.cur_distance_idx += 1; @@ -158,7 +157,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase let GraphBasedRankingRuleState { graph, conditions_cache: condition_docids_cache, - dead_ends_cache: dead_end_path_cache, + dead_ends_cache, all_distances, cur_distance_idx: _, } = &mut state; @@ -174,15 +173,15 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase // For each path of the given cost, we will compute its associated // document ids. // In case the path does not resolve to any document id, we try to figure out why - // and update the `dead_end_path_cache` accordingly. - // Updating the dead_end_path_cache helps speed up the execution of `visit_paths_of_cost` and reduces + // and update the `dead_ends_cache` accordingly. + // Updating the dead_ends_cache helps speed up the execution of `visit_paths_of_cost` and reduces // the number of future candidate paths given by that same function. graph.visit_paths_of_cost( graph.query_graph.root_node, cost, all_distances, - dead_end_path_cache, - |path, graph, dead_end_path_cache| { + dead_ends_cache, + |path, graph, dead_ends_cache| { if universe.is_empty() { return Ok(ControlFlow::Break(())); } @@ -211,7 +210,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase // and caches accordingly and skip to the next candidate path. if condition_docids.is_empty() { // 1. Store in the cache that this edge is empty for this universe - dead_end_path_cache.forbid_condition(latest_condition); + dead_ends_cache.forbid_condition(latest_condition); // 2. remove all the edges with this condition from the ranking rule graph graph.remove_edges_with_condition(latest_condition); // 3. Also remove the entry from the condition_docids_cache, since we don't need it anymore @@ -226,7 +225,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase let len_prefix = subpath_docids.len() - 1; // First, we know that this path is empty, and thus any path // that is a superset of it will also be empty. - dead_end_path_cache.forbid_condition_after_prefix( + dead_ends_cache.forbid_condition_after_prefix( visited_conditions[..len_prefix].iter().copied(), latest_condition, ); @@ -244,7 +243,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase }; subprefix.push(*past_condition); if condition_docids.is_disjoint(subpath_docids) { - dead_end_path_cache.forbid_condition_after_prefix( + dead_ends_cache.forbid_condition_after_prefix( subprefix.iter().copied(), latest_condition, ); @@ -253,8 +252,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase // keep the same prefix and check the intersection with // all the remaining conditions - let mut forbidden = dead_end_path_cache.forbidden.clone(); - let mut cursor = dead_end_path_cache; + let mut forbidden = dead_ends_cache.forbidden.clone(); + let mut cursor = dead_ends_cache; for &c in visited_conditions[..len_prefix].iter() { cursor = cursor.advance(c).unwrap(); forbidden.union(&cursor.forbidden); @@ -301,7 +300,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase G::log_state( &original_graph, &good_paths, - dead_end_path_cache, + dead_ends_cache, original_universe, all_distances, cost, diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs index a46e63005..23134c113 100644 --- a/milli/src/search/new/logger/detailed.rs +++ b/milli/src/search/new/logger/detailed.rs @@ -44,17 +44,17 @@ pub enum SearchEvents { ProximityState { graph: RankingRuleGraph, paths: Vec>>, - dead_end_path_cache: DeadEndsCache, + dead_ends_cache: DeadEndsCache, universe: RoaringBitmap, - distances: MappedInterner)>, QueryNode>, + distances: MappedInterner, QueryNode>, cost: u16, }, TypoState { graph: RankingRuleGraph, paths: Vec>>, - dead_end_path_cache: DeadEndsCache, + dead_ends_cache: DeadEndsCache, universe: RoaringBitmap, - distances: MappedInterner)>, QueryNode>, + distances: MappedInterner, QueryNode>, cost: u16, }, RankingRuleSkipBucket { @@ -170,15 +170,15 @@ impl SearchLogger for DetailedSearchLogger { &mut self, query_graph: &RankingRuleGraph, paths_map: &[Vec>], - dead_end_path_cache: &DeadEndsCache, + dead_ends_cache: &DeadEndsCache, universe: &RoaringBitmap, - distances: &MappedInterner)>, QueryNode>, + distances: &MappedInterner, QueryNode>, cost: u16, ) { self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.to_vec(), - dead_end_path_cache: dead_end_path_cache.clone(), + dead_ends_cache: dead_ends_cache.clone(), universe: universe.clone(), distances: distances.clone(), cost, @@ -189,15 +189,15 @@ impl SearchLogger for DetailedSearchLogger { &mut self, query_graph: &RankingRuleGraph, paths_map: &[Vec>], - dead_end_path_cache: &DeadEndsCache, + dead_ends_cache: &DeadEndsCache, universe: &RoaringBitmap, - distances: &MappedInterner)>, QueryNode>, + distances: &MappedInterner, QueryNode>, cost: u16, ) { self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.to_vec(), - dead_end_path_cache: dead_end_path_cache.clone(), + dead_ends_cache: dead_ends_cache.clone(), universe: universe.clone(), distances: distances.clone(), cost, @@ -357,7 +357,7 @@ results.{cur_ranking_rule}{cur_activated_id} {{ SearchEvents::ProximityState { graph, paths, - dead_end_path_cache, + dead_ends_cache, universe, distances, cost, @@ -373,7 +373,7 @@ results.{cur_ranking_rule}{cur_activated_id} {{ ctx, graph, paths, - dead_end_path_cache, + dead_ends_cache, distances.clone(), &mut new_file, ); @@ -390,7 +390,7 @@ results.{cur_ranking_rule}{cur_activated_id} {{ SearchEvents::TypoState { graph, paths, - dead_end_path_cache, + dead_ends_cache, universe, distances, cost, @@ -406,7 +406,7 @@ results.{cur_ranking_rule}{cur_activated_id} {{ ctx, graph, paths, - dead_end_path_cache, + dead_ends_cache, distances.clone(), &mut new_file, ); @@ -429,7 +429,7 @@ results.{cur_ranking_rule}{cur_activated_id} {{ ctx: &mut SearchContext, node_idx: Interned, node: &QueryNode, - distances: &[(u16, SmallBitmap)], + distances: &[u16], file: &mut File, ) { match &node.data { @@ -490,9 +490,8 @@ shape: class" let p = ctx.word_interner.get(*use_prefix_db); writeln!(file, "use prefix DB : {p}").unwrap(); } - for (d, edges) in distances.iter() { - writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::>()) - .unwrap(); + for d in distances.iter() { + writeln!(file, "\"d_{d}\" : distance").unwrap(); } writeln!(file, "}}").unwrap(); @@ -527,8 +526,8 @@ shape: class" ctx: &mut SearchContext, graph: &RankingRuleGraph, paths: &[Vec>], - dead_end_paths_cache: &DeadEndsCache, - distances: MappedInterner)>, QueryNode>, + _dead_ends_cache: &DeadEndsCache, + distances: MappedInterner, QueryNode>, file: &mut File, ) { writeln!(file, "direction: right").unwrap(); diff --git a/milli/src/search/new/logger/mod.rs b/milli/src/search/new/logger/mod.rs index f8ab89cbf..203ac7b56 100644 --- a/milli/src/search/new/logger/mod.rs +++ b/milli/src/search/new/logger/mod.rs @@ -8,7 +8,6 @@ use super::query_graph::QueryNode; use super::ranking_rule_graph::{ DeadEndsCache, ProximityCondition, ProximityGraph, RankingRuleGraph, TypoCondition, TypoGraph, }; -use super::small_bitmap::SmallBitmap; use super::{RankingRule, RankingRuleQueryTrait}; /// Trait for structure logging the execution of a search query. @@ -66,9 +65,9 @@ pub trait SearchLogger { &mut self, query_graph: &RankingRuleGraph, paths: &[Vec>], - dead_end_path_cache: &DeadEndsCache, + dead_ends_cache: &DeadEndsCache, universe: &RoaringBitmap, - distances: &MappedInterner)>, QueryNode>, + distances: &MappedInterner, QueryNode>, cost: u16, ); @@ -77,9 +76,9 @@ pub trait SearchLogger { &mut self, query_graph: &RankingRuleGraph, paths: &[Vec>], - dead_end_path_cache: &DeadEndsCache, + dead_ends_cache: &DeadEndsCache, universe: &RoaringBitmap, - distances: &MappedInterner)>, QueryNode>, + distances: &MappedInterner, QueryNode>, cost: u16, ); } @@ -137,9 +136,9 @@ impl SearchLogger for DefaultSearchLogger { &mut self, _query_graph: &RankingRuleGraph, _paths_map: &[Vec>], - _dead_end_path_cache: &DeadEndsCache, + _dead_ends_cache: &DeadEndsCache, _universe: &RoaringBitmap, - _distances: &MappedInterner)>, QueryNode>, + _distances: &MappedInterner, QueryNode>, _cost: u16, ) { } @@ -148,9 +147,9 @@ impl SearchLogger for DefaultSearchLogger { &mut self, _query_graph: &RankingRuleGraph, _paths: &[Vec>], - _dead_end_path_cache: &DeadEndsCache, + _dead_ends_cache: &DeadEndsCache, _universe: &RoaringBitmap, - _distances: &MappedInterner)>, QueryNode>, + _distances: &MappedInterner, QueryNode>, _cost: u16, ) { } diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index 017663443..c340ef8c7 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -1,7 +1,6 @@ #![allow(clippy::too_many_arguments)] -use std::collections::btree_map::Entry; -use std::collections::{BTreeMap, VecDeque}; +use std::collections::{BTreeSet, VecDeque}; use std::ops::ControlFlow; use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait}; @@ -15,8 +14,8 @@ impl RankingRuleGraph { &mut self, from: Interned, cost: u16, - all_distances: &MappedInterner)>, QueryNode>, - dead_end_path_cache: &mut DeadEndsCache, + all_distances: &MappedInterner, QueryNode>, + dead_ends_cache: &mut DeadEndsCache, mut visit: impl FnMut( &[Interned], &mut Self, @@ -27,11 +26,11 @@ impl RankingRuleGraph { from, cost, all_distances, - dead_end_path_cache, + dead_ends_cache, &mut visit, &mut vec![], &mut SmallBitmap::for_interned_values_in(&self.conditions_interner), - dead_end_path_cache.forbidden.clone(), + dead_ends_cache.forbidden.clone(), )?; Ok(()) } @@ -39,8 +38,8 @@ impl RankingRuleGraph { &mut self, from: Interned, cost: u16, - all_distances: &MappedInterner)>, QueryNode>, - dead_end_path_cache: &mut DeadEndsCache, + all_distances: &MappedInterner, QueryNode>, + dead_ends_cache: &mut DeadEndsCache, visit: &mut impl FnMut( &[Interned], &mut Self, @@ -62,7 +61,7 @@ impl RankingRuleGraph { None => { if edge.dest_node == self.query_graph.end_node { any_valid = true; - let control_flow = visit(prev_conditions, self, dead_end_path_cache)?; + let control_flow = visit(prev_conditions, self, dead_ends_cache)?; match control_flow { ControlFlow::Continue(_) => {} ControlFlow::Break(_) => return Ok(true), @@ -73,7 +72,7 @@ impl RankingRuleGraph { edge.dest_node, cost - edge.cost as u16, all_distances, - dead_end_path_cache, + dead_ends_cache, visit, prev_conditions, cur_path, @@ -83,12 +82,10 @@ impl RankingRuleGraph { } Some(condition) => { if forbidden_conditions.contains(condition) - || !all_distances.get(edge.dest_node).iter().any( - |(next_cost, necessary_conditions)| { - (*next_cost == cost - edge.cost as u16) - && !forbidden_conditions.intersects(necessary_conditions) - }, - ) + || all_distances + .get(edge.dest_node) + .iter() + .all(|next_cost| *next_cost != cost - edge.cost as u16) { continue; } @@ -96,14 +93,14 @@ impl RankingRuleGraph { prev_conditions.push(condition); let mut new_forbidden_conditions = forbidden_conditions.clone(); if let Some(next_forbidden) = - dead_end_path_cache.forbidden_conditions_after_prefix(prev_conditions) + dead_ends_cache.forbidden_conditions_after_prefix(prev_conditions) { new_forbidden_conditions.union(&next_forbidden); } let next_any_valid = if edge.dest_node == self.query_graph.end_node { any_valid = true; - let control_flow = visit(prev_conditions, self, dead_end_path_cache)?; + let control_flow = visit(prev_conditions, self, dead_ends_cache)?; match control_flow { ControlFlow::Continue(_) => {} ControlFlow::Break(_) => return Ok(true), @@ -114,7 +111,7 @@ impl RankingRuleGraph { edge.dest_node, cost - edge.cost as u16, all_distances, - dead_end_path_cache, + dead_ends_cache, visit, prev_conditions, cur_path, @@ -129,8 +126,8 @@ impl RankingRuleGraph { any_valid |= next_any_valid; if next_any_valid { - forbidden_conditions = dead_end_path_cache - .forbidden_conditions_for_all_prefixes_up_to(prev_conditions); + forbidden_conditions = + dead_ends_cache.forbidden_conditions_for_all_prefixes_up_to(prev_conditions); if cur_path.intersects(&forbidden_conditions) { break 'edges_loop; } @@ -140,16 +137,13 @@ impl RankingRuleGraph { Ok(any_valid) } - pub fn initialize_distances_with_necessary_edges( - &self, - ) -> MappedInterner)>, QueryNode> { + pub fn initialize_distances_with_necessary_edges(&self) -> MappedInterner, QueryNode> { let mut distances_to_end = self.query_graph.nodes.map(|_| vec![]); let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len()); let mut node_stack = VecDeque::new(); - *distances_to_end.get_mut(self.query_graph.end_node) = - vec![(0, SmallBitmap::for_interned_values_in(&self.conditions_interner))]; + *distances_to_end.get_mut(self.query_graph.end_node) = vec![0]; for prev_node in self.query_graph.nodes.get(self.query_graph.end_node).predecessors.iter() { node_stack.push_back(prev_node); @@ -157,35 +151,20 @@ impl RankingRuleGraph { } while let Some(cur_node) = node_stack.pop_front() { - let mut self_distances = BTreeMap::>::new(); + let mut self_distances = BTreeSet::::new(); let cur_node_edges = &self.edges_of_node.get(cur_node); for edge_idx in cur_node_edges.iter() { let edge = self.edges_store.get(edge_idx).as_ref().unwrap(); let succ_node = edge.dest_node; let succ_distances = distances_to_end.get(succ_node); - for (succ_distance, succ_necessary_conditions) in succ_distances { - let mut potential_necessary_edges = - SmallBitmap::for_interned_values_in(&self.conditions_interner); - for condition in - edge.condition.into_iter().chain(succ_necessary_conditions.iter()) - { - potential_necessary_edges.insert(condition); - } - - match self_distances.entry(edge.cost as u16 + succ_distance) { - Entry::Occupied(mut prev_necessary_edges) => { - prev_necessary_edges.get_mut().intersection(&potential_necessary_edges); - } - Entry::Vacant(entry) => { - entry.insert(potential_necessary_edges); - } - } + for succ_distance in succ_distances { + self_distances.insert(edge.cost as u16 + succ_distance); } } let distances_to_end_cur_node = distances_to_end.get_mut(cur_node); - for (cost, necessary_edges) in self_distances.iter() { - distances_to_end_cur_node.push((*cost, necessary_edges.clone())); + for cost in self_distances.iter() { + distances_to_end_cur_node.push(*cost); } *distances_to_end.get_mut(cur_node) = self_distances.into_iter().collect(); for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() { diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index b01c82969..129590088 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -112,9 +112,9 @@ pub trait RankingRuleGraphTrait: Sized { fn log_state( graph: &RankingRuleGraph, paths: &[Vec>], - dead_end_path_cache: &DeadEndsCache, + dead_ends_cache: &DeadEndsCache, universe: &RoaringBitmap, - distances: &MappedInterner)>, QueryNode>, + distances: &MappedInterner, QueryNode>, cost: u16, logger: &mut dyn SearchLogger, ); diff --git a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs index 690200773..4c058ac8e 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs @@ -6,11 +6,10 @@ use std::iter::FromIterator; use roaring::RoaringBitmap; -use super::{RankingRuleGraph, RankingRuleGraphTrait, DeadEndsCache}; +use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait}; use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; use crate::search::new::logger::SearchLogger; use crate::search::new::query_term::{Phrase, QueryTerm}; -use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::{QueryGraph, QueryNode, SearchContext}; use crate::Result; @@ -66,13 +65,13 @@ impl RankingRuleGraphTrait for ProximityGraph { fn log_state( graph: &RankingRuleGraph, paths: &[Vec>], - dead_end_path_cache: &DeadEndsCache, + dead_ends_cache: &DeadEndsCache, universe: &RoaringBitmap, - distances: &MappedInterner)>, QueryNode>, + distances: &MappedInterner, QueryNode>, cost: u16, logger: &mut dyn SearchLogger, ) { - logger.log_proximity_state(graph, paths, dead_end_path_cache, universe, distances, cost); + logger.log_proximity_state(graph, paths, dead_ends_cache, universe, distances, cost); } fn label_for_condition<'ctx>( diff --git a/milli/src/search/new/ranking_rule_graph/typo/mod.rs b/milli/src/search/new/ranking_rule_graph/typo/mod.rs index e1e01d6b1..854bd589b 100644 --- a/milli/src/search/new/ranking_rule_graph/typo/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/typo/mod.rs @@ -5,7 +5,6 @@ use crate::search::new::interner::{DedupInterner, Interned, MappedInterner}; use crate::search::new::logger::SearchLogger; use crate::search::new::query_graph::QueryNodeData; use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm}; -use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::{QueryGraph, QueryNode, SearchContext}; use crate::Result; use std::collections::HashSet; @@ -136,13 +135,13 @@ impl RankingRuleGraphTrait for TypoGraph { fn log_state( graph: &RankingRuleGraph, paths: &[Vec>], - dead_end_path_cache: &DeadEndsCache, + dead_ends_cache: &DeadEndsCache, universe: &RoaringBitmap, - distances: &MappedInterner)>, QueryNode>, + distances: &MappedInterner, QueryNode>, cost: u16, logger: &mut dyn SearchLogger, ) { - logger.log_typo_state(graph, paths, dead_end_path_cache, universe, distances, cost); + logger.log_typo_state(graph, paths, dead_ends_cache, universe, distances, cost); } fn label_for_condition<'ctx>(