mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Rewrite the dead-ends cache to detect more dead-ends
This commit is contained in:
parent
49240c367a
commit
c6ff97a220
@ -135,7 +135,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
query_graph: &QueryGraph,
|
||||
) -> Result<()> {
|
||||
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
||||
let mut condition_docids_cache = ConditionDocIdsCache::new(universe);
|
||||
let mut condition_docids_cache = ConditionDocIdsCache::default();
|
||||
let mut dead_end_path_cache = DeadEndsCache::new(&graph.conditions_interner);
|
||||
|
||||
// First simplify the graph as much as possible, by computing the docids of all the conditions
|
||||
@ -215,36 +215,36 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
|
||||
let original_graph = graph.clone();
|
||||
let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner);
|
||||
let mut paths = vec![];
|
||||
let mut considered_paths = vec![];
|
||||
let mut good_paths = vec![];
|
||||
|
||||
// For each path of the given cost, we will compute its associated
|
||||
// document ids.
|
||||
// In case the path does not resolve to any document id, we try to figure out why
|
||||
// and update the `dead_end_path_cache` accordingly.
|
||||
// For example, it may be that the path is empty because one of its edges is disjoint
|
||||
// with the universe, or because a prefix of the path is disjoint with the universe, or because
|
||||
// the path contains two edges that are disjoint from each other within the universe.
|
||||
// Updating the dead_end_path_cache helps speed up the execution of `visit_paths_of_cost` and reduces
|
||||
// the number of future candidate paths given by that same function.
|
||||
graph.visit_paths_of_cost(
|
||||
graph.query_graph.root_node,
|
||||
cost,
|
||||
all_distances,
|
||||
dead_end_path_cache.forbidden.clone(),
|
||||
|condition, forbidden_conditions| {},
|
||||
dead_end_path_cache,
|
||||
|path, graph, dead_end_path_cache| {
|
||||
if universe.is_empty() {
|
||||
return Ok(ControlFlow::Break(()));
|
||||
}
|
||||
// Accumulate the path for logging purposes only
|
||||
paths.push(path.to_vec());
|
||||
considered_paths.push(path.to_vec());
|
||||
|
||||
let mut path_docids = universe.clone();
|
||||
|
||||
// We store the edges and their docids in vectors in case the path turns out to be
|
||||
// empty and we need to figure out why it was empty.
|
||||
let mut visited_conditions = vec![];
|
||||
let mut cached_condition_docids = vec![];
|
||||
// let mut cached_condition_docids = vec![];
|
||||
let mut subpath_docids = vec![];
|
||||
|
||||
for &latest_condition in path {
|
||||
for (latest_condition_path_idx, &latest_condition) in path.iter().enumerate() {
|
||||
visited_conditions.push(latest_condition);
|
||||
|
||||
let condition_docids = condition_docids_cache.get_condition_docids(
|
||||
@ -254,11 +254,9 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
&universe,
|
||||
)?;
|
||||
|
||||
cached_condition_docids.push((latest_condition, condition_docids.clone()));
|
||||
|
||||
// If the edge is empty, then the path will be empty as well, we update the graph
|
||||
// and caches accordingly and skip to the next candidate path.
|
||||
if condition_docids.is_disjoint(&universe) {
|
||||
if condition_docids.is_empty() {
|
||||
// 1. Store in the cache that this edge is empty for this universe
|
||||
dead_end_path_cache.forbid_condition(latest_condition);
|
||||
// 2. remove all the edges with this condition from the ranking rule graph
|
||||
@ -267,45 +265,71 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
condition_docids_cache.cache.remove(&latest_condition);
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
path_docids &= condition_docids;
|
||||
subpath_docids.push(path_docids.clone());
|
||||
|
||||
// If the (sub)path is empty, we try to figure out why and update the caches accordingly.
|
||||
if path_docids.is_disjoint(condition_docids) {
|
||||
if path_docids.is_empty() {
|
||||
let len_prefix = subpath_docids.len() - 1;
|
||||
// First, we know that this path is empty, and thus any path
|
||||
// that is a superset of it will also be empty.
|
||||
dead_end_path_cache.forbid_condition_after_prefix(
|
||||
visited_conditions[..visited_conditions.len() - 1].iter().copied(),
|
||||
visited_conditions[..len_prefix].iter().copied(),
|
||||
latest_condition,
|
||||
);
|
||||
|
||||
let mut dead_end_cache_cursor = dead_end_path_cache;
|
||||
if visited_conditions.len() > 1 {
|
||||
let mut subprefix = vec![];
|
||||
// Deadend if the intersection between this edge and any
|
||||
// previous prefix is disjoint with the universe
|
||||
for (past_condition, subpath_docids) in visited_conditions[..len_prefix]
|
||||
.iter()
|
||||
.zip(subpath_docids[..len_prefix].iter())
|
||||
{
|
||||
if *past_condition == latest_condition {
|
||||
todo!();
|
||||
};
|
||||
subprefix.push(*past_condition);
|
||||
if condition_docids.is_disjoint(subpath_docids) {
|
||||
dead_end_path_cache.forbid_condition_after_prefix(
|
||||
subprefix.iter().copied(),
|
||||
latest_condition,
|
||||
);
|
||||
}
|
||||
}
|
||||
|
||||
// Second, if the intersection between this edge and any
|
||||
// previous prefix is disjoint with the universe, then... TODO
|
||||
for (past_condition, past_condition_docids) in
|
||||
cached_condition_docids.iter()
|
||||
{
|
||||
// TODO: should ensure that it is simply not possible to have twice
|
||||
// the same condition in the cached_condition_docids. Maybe it is
|
||||
// already the case?
|
||||
dead_end_cache_cursor =
|
||||
dead_end_cache_cursor.advance(*past_condition).unwrap();
|
||||
// TODO: check how that interacts with the dead end cache?
|
||||
if *past_condition == latest_condition {
|
||||
// TODO: should we break instead?
|
||||
// Is it even possible?
|
||||
continue;
|
||||
};
|
||||
if condition_docids.is_disjoint(past_condition_docids) {
|
||||
dead_end_cache_cursor.forbid_condition(latest_condition);
|
||||
// keep the same prefix and check the intersection with
|
||||
// all the remaining conditions
|
||||
let mut forbidden = dead_end_path_cache.forbidden.clone();
|
||||
let mut cursor = dead_end_path_cache;
|
||||
for &c in visited_conditions[..len_prefix].iter() {
|
||||
cursor = cursor.advance(c).unwrap();
|
||||
forbidden.union(&cursor.forbidden);
|
||||
}
|
||||
|
||||
let past_path_docids = &subpath_docids[subpath_docids.len() - 2];
|
||||
|
||||
let remaining_conditions =
|
||||
path[latest_condition_path_idx..].iter().skip(1);
|
||||
for next_condition in remaining_conditions {
|
||||
if forbidden.contains(*next_condition) {
|
||||
continue;
|
||||
}
|
||||
let next_condition_docids = condition_docids_cache
|
||||
.get_condition_docids(ctx, *next_condition, graph, &universe)?;
|
||||
|
||||
if past_path_docids.is_disjoint(next_condition_docids) {
|
||||
cursor.forbid_condition(*next_condition);
|
||||
}
|
||||
}
|
||||
}
|
||||
// We should maybe instead try to compute:
|
||||
// 0th & nth & 1st & n-1th & 2nd & etc...
|
||||
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
} else {
|
||||
path_docids &= condition_docids;
|
||||
}
|
||||
}
|
||||
assert!(!path_docids.is_empty());
|
||||
// Accumulate the path for logging purposes only
|
||||
good_paths.push(path.to_vec());
|
||||
for condition in path {
|
||||
used_conditions.insert(*condition);
|
||||
}
|
||||
@ -323,7 +347,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
// println!(" {} paths of cost {} in {}", paths.len(), cost, self.id);
|
||||
G::log_state(
|
||||
&original_graph,
|
||||
&paths,
|
||||
&good_paths,
|
||||
dead_end_path_cache,
|
||||
original_universe,
|
||||
all_distances,
|
||||
|
@ -10,7 +10,7 @@ use crate::search::new::interner::{Interned, MappedInterner};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
|
||||
use crate::search::new::ranking_rule_graph::{
|
||||
DeadEndPathCache, Edge, ProximityCondition, ProximityGraph, RankingRuleGraph,
|
||||
DeadEndsCache, Edge, ProximityCondition, ProximityGraph, RankingRuleGraph,
|
||||
RankingRuleGraphTrait, TypoCondition, TypoGraph,
|
||||
};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
@ -44,7 +44,7 @@ pub enum SearchEvents {
|
||||
ProximityState {
|
||||
graph: RankingRuleGraph<ProximityGraph>,
|
||||
paths: Vec<Vec<Interned<ProximityCondition>>>,
|
||||
dead_end_path_cache: DeadEndPathCache<ProximityGraph>,
|
||||
dead_end_path_cache: DeadEndsCache<ProximityCondition>,
|
||||
universe: RoaringBitmap,
|
||||
distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
@ -52,7 +52,7 @@ pub enum SearchEvents {
|
||||
TypoState {
|
||||
graph: RankingRuleGraph<TypoGraph>,
|
||||
paths: Vec<Vec<Interned<TypoCondition>>>,
|
||||
dead_end_path_cache: DeadEndPathCache<TypoGraph>,
|
||||
dead_end_path_cache: DeadEndsCache<TypoCondition>,
|
||||
universe: RoaringBitmap,
|
||||
distances: MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
@ -170,7 +170,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||
paths_map: &[Vec<Interned<ProximityCondition>>],
|
||||
dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
|
||||
dead_end_path_cache: &DeadEndsCache<ProximityCondition>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
@ -189,7 +189,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||
paths_map: &[Vec<Interned<TypoCondition>>],
|
||||
dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
|
||||
dead_end_path_cache: &DeadEndsCache<TypoCondition>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
@ -527,7 +527,7 @@ shape: class"
|
||||
ctx: &mut SearchContext,
|
||||
graph: &RankingRuleGraph<R>,
|
||||
paths: &[Vec<Interned<R::Condition>>],
|
||||
dead_end_paths_cache: &DeadEndPathCache<R>,
|
||||
dead_end_paths_cache: &DeadEndsCache<R::Condition>,
|
||||
distances: MappedInterner<Vec<(u16, SmallBitmap<R::Condition>)>, QueryNode>,
|
||||
file: &mut File,
|
||||
) {
|
||||
@ -583,11 +583,11 @@ shape: class"
|
||||
// }
|
||||
// writeln!(file, "}}").unwrap();
|
||||
|
||||
writeln!(file, "Dead-end edges {{").unwrap();
|
||||
for condition in dead_end_paths_cache.conditions.iter() {
|
||||
writeln!(file, "{condition}").unwrap();
|
||||
}
|
||||
writeln!(file, "}}").unwrap();
|
||||
// writeln!(file, "Dead-end edges {{").unwrap();
|
||||
// for condition in dead_end_paths_cache.conditions.iter() {
|
||||
// writeln!(file, "{condition}").unwrap();
|
||||
// }
|
||||
// writeln!(file, "}}").unwrap();
|
||||
|
||||
// writeln!(file, "Dead-end prefixes {{").unwrap();
|
||||
// writeln!(file, "}}").unwrap();
|
||||
|
@ -37,7 +37,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
&mut visit,
|
||||
&mut vec![],
|
||||
&mut SmallBitmap::for_interned_values_in(&self.conditions_interner),
|
||||
&mut dead_end_path_cache.forbidden.clone(),
|
||||
dead_end_path_cache.forbidden.clone(),
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
@ -54,12 +54,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
) -> Result<ControlFlow<()>>,
|
||||
prev_conditions: &mut Vec<Interned<G::Condition>>,
|
||||
cur_path: &mut SmallBitmap<G::Condition>,
|
||||
forbidden_conditions: &mut SmallBitmap<G::Condition>,
|
||||
mut forbidden_conditions: SmallBitmap<G::Condition>,
|
||||
) -> Result<bool> {
|
||||
let mut any_valid = false;
|
||||
|
||||
let edges = self.edges_of_node.get(from).clone();
|
||||
for edge_idx in edges.iter() {
|
||||
'edges_loop: for edge_idx in edges.iter() {
|
||||
let Some(edge) = self.edges_store.get(edge_idx).as_ref() else { continue };
|
||||
if cost < edge.cost as u16 {
|
||||
continue;
|
||||
@ -73,6 +73,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
ControlFlow::Continue(_) => {}
|
||||
ControlFlow::Break(_) => return Ok(true),
|
||||
}
|
||||
true
|
||||
} else {
|
||||
self.visit_paths_of_cost_rec(
|
||||
edge.dest_node,
|
||||
@ -82,8 +83,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
visit,
|
||||
prev_conditions,
|
||||
cur_path,
|
||||
forbidden_conditions,
|
||||
)?;
|
||||
forbidden_conditions.clone(),
|
||||
)?
|
||||
}
|
||||
}
|
||||
Some(condition) => {
|
||||
@ -101,18 +102,19 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
prev_conditions.push(condition);
|
||||
let mut new_forbidden_conditions = forbidden_conditions.clone();
|
||||
if let Some(next_forbidden) =
|
||||
dead_end_path_cache.forbidden_conditions_after_prefix(&prev_conditions)
|
||||
dead_end_path_cache.forbidden_conditions_after_prefix(prev_conditions)
|
||||
{
|
||||
new_forbidden_conditions.union(&next_forbidden);
|
||||
}
|
||||
|
||||
if edge.dest_node == self.query_graph.end_node {
|
||||
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
||||
any_valid = true;
|
||||
let control_flow = visit(prev_conditions, self, dead_end_path_cache)?;
|
||||
match control_flow {
|
||||
ControlFlow::Continue(_) => {}
|
||||
ControlFlow::Break(_) => return Ok(true),
|
||||
}
|
||||
true
|
||||
} else {
|
||||
self.visit_paths_of_cost_rec(
|
||||
edge.dest_node,
|
||||
@ -122,13 +124,23 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
visit,
|
||||
prev_conditions,
|
||||
cur_path,
|
||||
&mut new_forbidden_conditions,
|
||||
)?;
|
||||
}
|
||||
new_forbidden_conditions,
|
||||
)?
|
||||
};
|
||||
cur_path.remove(condition);
|
||||
prev_conditions.pop();
|
||||
next_any_valid
|
||||
}
|
||||
};
|
||||
any_valid |= next_any_valid;
|
||||
|
||||
if next_any_valid {
|
||||
forbidden_conditions = dead_end_path_cache
|
||||
.forbidden_conditions_for_all_prefixes_up_to(prev_conditions);
|
||||
if cur_path.intersects(&forbidden_conditions) {
|
||||
break 'edges_loop;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
Ok(any_valid)
|
||||
|
@ -8,20 +8,17 @@ use crate::search::new::interner::Interned;
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
// TODO: give a generation to each universe, then be able to get the exact
|
||||
// delta of docids between two universes of different generations!
|
||||
|
||||
/// A cache storing the document ids associated with each ranking rule edge
|
||||
pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
|
||||
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
||||
pub cache: FxHashMap<Interned<G::Condition>, RoaringBitmap>,
|
||||
pub universe_length: u64,
|
||||
pub cache: FxHashMap<Interned<G::Condition>, (u64, RoaringBitmap)>,
|
||||
_phantom: PhantomData<G>,
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
|
||||
pub fn new(universe: &RoaringBitmap) -> Self {
|
||||
Self {
|
||||
cache: Default::default(),
|
||||
_phantom: Default::default(),
|
||||
universe_length: universe.len(),
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> {
|
||||
fn default() -> Self {
|
||||
Self { cache: Default::default(), _phantom: Default::default() }
|
||||
}
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
|
||||
@ -40,20 +37,21 @@ impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
|
||||
if self.cache.contains_key(&interned_condition) {
|
||||
// TODO compare length of universe compared to the one in self
|
||||
// if it is smaller, then update the value
|
||||
|
||||
// TODO: should we update the bitmap in the cache if the new universe
|
||||
// reduces it?
|
||||
// TODO: maybe have a generation: u32 to track every time the universe was
|
||||
// reduced. Then only attempt to recompute the intersection when there is a chance
|
||||
// that condition_docids & universe changed
|
||||
return Ok(&self.cache[&interned_condition]);
|
||||
let (universe_len, docids) = self.cache.entry(interned_condition).or_default();
|
||||
if *universe_len == universe.len() {
|
||||
return Ok(docids);
|
||||
} else {
|
||||
*docids &= universe;
|
||||
*universe_len = universe.len();
|
||||
return Ok(docids);
|
||||
}
|
||||
}
|
||||
// TODO: maybe universe doesn't belong here
|
||||
let condition = graph.conditions_interner.get(interned_condition);
|
||||
// TODO: faster way to do this?
|
||||
let docids = universe & G::resolve_condition(ctx, condition, universe)?;
|
||||
let _ = self.cache.insert(interned_condition, docids);
|
||||
let docids = &self.cache[&interned_condition];
|
||||
let docids = G::resolve_condition(ctx, condition, universe)?;
|
||||
let _ = self.cache.insert(interned_condition, (universe.len(), docids));
|
||||
let (_, docids) = &self.cache[&interned_condition];
|
||||
Ok(docids)
|
||||
}
|
||||
}
|
||||
|
@ -1,83 +0,0 @@
|
||||
// use super::{path_set::PathSet, RankingRuleGraphTrait};
|
||||
// use crate::search::new::{
|
||||
// interner::{FixedSizeInterner, Interned, MappedInterner},
|
||||
// small_bitmap::SmallBitmap,
|
||||
// };
|
||||
|
||||
// /// A cache which stores sufficient conditions for a path
|
||||
// /// to resolve to an empty set of candidates within the current
|
||||
// /// universe.
|
||||
// pub struct DeadEndPathCache<G: RankingRuleGraphTrait> {
|
||||
// /// The set of edge conditions that resolve to no documents.
|
||||
// pub conditions: SmallBitmap<G::Condition>,
|
||||
// /// A set of path prefixes that resolve to no documents.
|
||||
// pub prefixes: PathSet<G::Condition>,
|
||||
// /// A set of empty couples of edge conditions that resolve to no documents.
|
||||
// pub condition_couples: MappedInterner<SmallBitmap<G::Condition>, G::Condition>,
|
||||
// }
|
||||
// impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
|
||||
// fn clone(&self) -> Self {
|
||||
// Self {
|
||||
// conditions: self.conditions.clone(),
|
||||
// prefixes: self.prefixes.clone(),
|
||||
// condition_couples: self.condition_couples.clone(),
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
||||
// /// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
||||
// pub fn new(all_conditions: &FixedSizeInterner<G::Condition>) -> Self {
|
||||
// Self {
|
||||
// conditions: SmallBitmap::for_interned_values_in(all_conditions),
|
||||
// prefixes: PathSet::default(),
|
||||
// condition_couples: all_conditions
|
||||
// .map(|_| SmallBitmap::for_interned_values_in(all_conditions)),
|
||||
// }
|
||||
// }
|
||||
|
||||
// /// Store in the cache that every path containing the given edge resolves to no documents.
|
||||
// pub fn add_condition(&mut self, condition: Interned<G::Condition>) {
|
||||
// self.conditions.insert(condition);
|
||||
// self.condition_couples.get_mut(condition).clear();
|
||||
// self.prefixes.remove_edge(condition);
|
||||
// for (_, edges2) in self.condition_couples.iter_mut() {
|
||||
// edges2.remove(condition);
|
||||
// }
|
||||
// }
|
||||
// /// Store in the cache that every path containing the given prefix resolves to no documents.
|
||||
// pub fn add_prefix(&mut self, prefix: &[Interned<G::Condition>]) {
|
||||
// // TODO: typed PathSet
|
||||
// self.prefixes.insert(prefix.iter().copied());
|
||||
// }
|
||||
|
||||
// /// Store in the cache that every path containing the two given edges resolves to no documents.
|
||||
// pub fn add_condition_couple(
|
||||
// &mut self,
|
||||
// edge1: Interned<G::Condition>,
|
||||
// edge2: Interned<G::Condition>,
|
||||
// ) {
|
||||
// self.condition_couples.get_mut(edge1).insert(edge2);
|
||||
// }
|
||||
|
||||
// /// Returns true if the cache can determine that the given path resolves to no documents.
|
||||
// pub fn path_is_dead_end(
|
||||
// &self,
|
||||
// path: &[Interned<G::Condition>],
|
||||
// path_bitmap: &SmallBitmap<G::Condition>,
|
||||
// ) -> bool {
|
||||
// if path_bitmap.intersects(&self.conditions) {
|
||||
// return true;
|
||||
// }
|
||||
// for condition in path.iter() {
|
||||
// let forbidden_other_edges = self.condition_couples.get(*condition);
|
||||
// if path_bitmap.intersects(forbidden_other_edges) {
|
||||
// return true;
|
||||
// }
|
||||
// }
|
||||
// if self.prefixes.contains_prefix_of_path(path) {
|
||||
// return true;
|
||||
// }
|
||||
// false
|
||||
// }
|
||||
// }
|
@ -8,8 +8,7 @@ the same but the edges are replaced.
|
||||
mod build;
|
||||
mod cheapest_paths;
|
||||
mod condition_docids_cache;
|
||||
mod dead_end_path_cache;
|
||||
mod path_set;
|
||||
mod dead_ends_cache;
|
||||
|
||||
/// Implementation of the `proximity` ranking rule
|
||||
mod proximity;
|
||||
@ -20,8 +19,7 @@ use std::collections::HashSet;
|
||||
use std::hash::Hash;
|
||||
|
||||
pub use condition_docids_cache::ConditionDocIdsCache;
|
||||
// pub use dead_end_path_cache::DeadEndPathCache;
|
||||
pub use path_set::DeadEndsCache;
|
||||
pub use dead_ends_cache::DeadEndsCache;
|
||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||
use roaring::RoaringBitmap;
|
||||
pub use typo::{TypoCondition, TypoGraph};
|
||||
|
@ -1,166 +0,0 @@
|
||||
// What is PathSet used for?
|
||||
// For the empty_prefixes field in the EmptyPathsCache only :/
|
||||
// but it could be used for more, like efficient computing of a set of paths
|
||||
|
||||
use crate::search::new::{
|
||||
interner::{FixedSizeInterner, Interned},
|
||||
small_bitmap::SmallBitmap,
|
||||
};
|
||||
|
||||
pub struct DeadEndsCache<T> {
|
||||
nodes: Vec<(Interned<T>, Self)>,
|
||||
pub forbidden: SmallBitmap<T>,
|
||||
}
|
||||
impl<T> DeadEndsCache<T> {
|
||||
pub fn new(for_interner: &FixedSizeInterner<T>) -> Self {
|
||||
Self { nodes: vec![], forbidden: SmallBitmap::for_interned_values_in(for_interner) }
|
||||
}
|
||||
pub fn forbid_condition(&mut self, condition: Interned<T>) {
|
||||
self.forbidden.insert(condition);
|
||||
}
|
||||
fn advance(&mut self, condition: Interned<T>) -> Option<&mut Self> {
|
||||
for (e, next_node) in &mut self.nodes {
|
||||
if condition == *e {
|
||||
return Some(next_node);
|
||||
}
|
||||
}
|
||||
None
|
||||
}
|
||||
pub fn forbidden_conditions_after_prefix(
|
||||
&mut self,
|
||||
mut prefix: &[Interned<T>],
|
||||
) -> Option<SmallBitmap<T>> {
|
||||
let mut cursor = self;
|
||||
for c in prefix.iter() {
|
||||
if let Some(next) = cursor.advance(*c) {
|
||||
cursor = next;
|
||||
} else {
|
||||
return None;
|
||||
}
|
||||
}
|
||||
Some(cursor.forbidden.clone())
|
||||
}
|
||||
pub fn forbid_condition_after_prefix(
|
||||
&mut self,
|
||||
mut prefix: impl Iterator<Item = Interned<T>>,
|
||||
forbidden: Interned<T>,
|
||||
) {
|
||||
match prefix.next() {
|
||||
None => {
|
||||
self.forbidden.insert(forbidden);
|
||||
}
|
||||
Some(first_condition) => {
|
||||
for (condition, next_node) in &mut self.nodes {
|
||||
if condition == &first_condition {
|
||||
return next_node.forbid_condition_after_prefix(prefix, forbidden);
|
||||
}
|
||||
}
|
||||
let mut rest = DeadEndsCache {
|
||||
nodes: vec![],
|
||||
forbidden: SmallBitmap::new(self.forbidden.universe_length()),
|
||||
};
|
||||
rest.forbid_condition_after_prefix(prefix, forbidden);
|
||||
self.nodes.push((first_condition, rest));
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
// /// A set of `Vec<Interned<T>>` implemented as a prefix tree.
|
||||
// pub struct PathSet<T> {
|
||||
// nodes: Vec<(Interned<T>, Self)>,
|
||||
// is_end: bool,
|
||||
// }
|
||||
|
||||
// impl<T> Clone for PathSet<T> {
|
||||
// fn clone(&self) -> Self {
|
||||
// Self { nodes: self.nodes.clone(), is_end: self.is_end }
|
||||
// }
|
||||
// }
|
||||
|
||||
// impl<T> std::fmt::Debug for PathSet<T> {
|
||||
// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||
// f.debug_struct("PathSet").field("nodes", &self.nodes).field("is_end", &self.is_end).finish()
|
||||
// }
|
||||
// }
|
||||
|
||||
// impl<T> Default for PathSet<T> {
|
||||
// fn default() -> Self {
|
||||
// Self { nodes: Default::default(), is_end: Default::default() }
|
||||
// }
|
||||
// }
|
||||
|
||||
// impl<T> PathSet<T> {
|
||||
// pub fn insert(&mut self, mut conditions: impl Iterator<Item = Interned<T>>) {
|
||||
// match conditions.next() {
|
||||
// None => {
|
||||
// self.is_end = true;
|
||||
// }
|
||||
// Some(first_condition) => {
|
||||
// for (condition, next_node) in &mut self.nodes {
|
||||
// if condition == &first_condition {
|
||||
// return next_node.insert(conditions);
|
||||
// }
|
||||
// }
|
||||
// let mut rest = PathSet::default();
|
||||
// rest.insert(conditions);
|
||||
// self.nodes.push((first_condition, rest));
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// pub fn remove_condition(&mut self, forbidden_condition: Interned<T>) {
|
||||
// let mut i = 0;
|
||||
// while i < self.nodes.len() {
|
||||
// let should_remove = if self.nodes[i].0 == forbidden_condition {
|
||||
// true
|
||||
// } else if !self.nodes[i].1.nodes.is_empty() {
|
||||
// self.nodes[i].1.remove_condition(forbidden_condition);
|
||||
// self.nodes[i].1.nodes.is_empty()
|
||||
// } else {
|
||||
// false
|
||||
// };
|
||||
// if should_remove {
|
||||
// self.nodes.remove(i);
|
||||
// } else {
|
||||
// i += 1;
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// pub fn final_conditions_after_prefix(
|
||||
// &self,
|
||||
// prefix: &[Interned<T>],
|
||||
// visit: &mut impl FnMut(Interned<T>),
|
||||
// ) {
|
||||
// let [first_condition, remaining_prefix @ ..] = prefix else {
|
||||
// for node in self.nodes.iter() {
|
||||
// if node.1.is_end {
|
||||
// visit(node.0)
|
||||
// }
|
||||
// }
|
||||
// return
|
||||
// };
|
||||
// for (condition, rest) in self.nodes.iter() {
|
||||
// if condition == first_condition {
|
||||
// return rest.final_conditions_after_prefix(remaining_prefix, visit);
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
|
||||
// pub fn contains_prefix_of_path(&self, path: &[Interned<T>]) -> bool {
|
||||
// if self.is_end {
|
||||
// return true;
|
||||
// }
|
||||
// match path {
|
||||
// [] => false,
|
||||
// [first_condition, remaining_path @ ..] => {
|
||||
// for (condition, rest) in self.nodes.iter() {
|
||||
// if condition == first_condition {
|
||||
// return rest.contains_prefix_of_path(remaining_path);
|
||||
// }
|
||||
// }
|
||||
// false
|
||||
// }
|
||||
// }
|
||||
// }
|
||||
// }
|
Loading…
Reference in New Issue
Block a user