mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 21:20:24 +01:00
Replace EdgeCondition with an Option<..> + other code cleanup
This commit is contained in:
parent
7b1d8f4c6d
commit
aa59c3bc2c
@ -20,10 +20,17 @@ pub struct DistinctOutput {
|
||||
pub excluded: RoaringBitmap,
|
||||
}
|
||||
|
||||
/// Return a [`DistinctOutput`] containing:
|
||||
/// - `remaining`: a set of docids built such that exactly one element from `candidates`
|
||||
/// is kept for each distinct value inside the given field. If the field does not exist, it
|
||||
/// is considered unique.
|
||||
/// - `excluded`: the set of document ids that contain a value for the given field that occurs
|
||||
/// in the given candidates.
|
||||
pub fn apply_distinct_rule<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
field_id: u16,
|
||||
candidates: &RoaringBitmap,
|
||||
// TODO: add a universe here, such that the `excluded` are a subset of the universe?
|
||||
) -> Result<DistinctOutput> {
|
||||
let mut excluded = RoaringBitmap::new();
|
||||
let mut remaining = RoaringBitmap::new();
|
||||
@ -37,6 +44,7 @@ pub fn apply_distinct_rule<'ctx>(
|
||||
Ok(DistinctOutput { remaining, excluded })
|
||||
}
|
||||
|
||||
/// Apply the distinct rule defined by [`apply_distinct_rule`] for a single document id.
|
||||
fn distinct_single_docid(
|
||||
index: &Index,
|
||||
txn: &RoTxn,
|
||||
@ -69,6 +77,7 @@ fn distinct_single_docid(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
/// Return all the docids containing the given value in the given field
|
||||
fn facet_value_docids(
|
||||
database: Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||
txn: &RoTxn,
|
||||
@ -79,13 +88,15 @@ fn facet_value_docids(
|
||||
.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value })
|
||||
.map(|opt| opt.map(|v| v.bitmap))
|
||||
}
|
||||
|
||||
/// Return an iterator over each number value in the given field of the given document.
|
||||
fn facet_number_values<'a>(
|
||||
id: u32,
|
||||
distinct: u16,
|
||||
docid: u32,
|
||||
field_id: u16,
|
||||
index: &Index,
|
||||
txn: &'a RoTxn,
|
||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Unit>> {
|
||||
let key = facet_values_prefix_key(distinct, id);
|
||||
let key = facet_values_prefix_key(field_id, docid);
|
||||
|
||||
let iter = index
|
||||
.field_id_docid_facet_f64s
|
||||
@ -96,13 +107,14 @@ fn facet_number_values<'a>(
|
||||
Ok(iter)
|
||||
}
|
||||
|
||||
/// Return an iterator over each string value in the given field of the given document.
|
||||
fn facet_string_values<'a>(
|
||||
docid: u32,
|
||||
distinct: u16,
|
||||
field_id: u16,
|
||||
index: &Index,
|
||||
txn: &'a RoTxn,
|
||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Str>> {
|
||||
let key = facet_values_prefix_key(distinct, docid);
|
||||
let key = facet_values_prefix_key(field_id, docid);
|
||||
|
||||
let iter = index
|
||||
.field_id_docid_facet_strings
|
||||
|
@ -45,7 +45,7 @@ use super::interner::MappedInterner;
|
||||
use super::logger::SearchLogger;
|
||||
use super::query_graph::QueryNode;
|
||||
use super::ranking_rule_graph::{
|
||||
DeadEndPathCache, EdgeCondition, EdgeConditionDocIdsCache, ProximityGraph, RankingRuleGraph,
|
||||
DeadEndPathCache, EdgeConditionDocIdsCache, ProximityGraph, RankingRuleGraph,
|
||||
RankingRuleGraphTrait, TypoGraph,
|
||||
};
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
@ -87,7 +87,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
||||
/// Cache to retrieve the docids associated with each edge
|
||||
edge_conditions_cache: EdgeConditionDocIdsCache<G>,
|
||||
/// Cache used to optimistically discard paths that resolve to no documents.
|
||||
empty_paths_cache: DeadEndPathCache<G>,
|
||||
dead_end_path_cache: DeadEndPathCache<G>,
|
||||
/// A structure giving the list of possible costs from each node to the end node,
|
||||
/// along with a set of unavoidable edges that must be traversed to achieve that distance.
|
||||
all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||
@ -101,27 +101,23 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
||||
fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
graph: &mut RankingRuleGraph<G>,
|
||||
edge_docids_cache: &mut EdgeConditionDocIdsCache<G>,
|
||||
condition_docids_cache: &mut EdgeConditionDocIdsCache<G>,
|
||||
universe: &RoaringBitmap,
|
||||
empty_paths_cache: &mut DeadEndPathCache<G>,
|
||||
dead_end_path_cache: &mut DeadEndPathCache<G>,
|
||||
) -> Result<()> {
|
||||
for edge_id in graph.edges_store.indexes() {
|
||||
let Some(edge) = graph.edges_store.get(edge_id).as_ref() else {
|
||||
continue;
|
||||
};
|
||||
let condition = edge.condition;
|
||||
let Some(condition) = edge.condition else { continue };
|
||||
|
||||
match condition {
|
||||
EdgeCondition::Unconditional => continue,
|
||||
EdgeCondition::Conditional(condition) => {
|
||||
let docids = edge_docids_cache.get_edge_docids(ctx, condition, graph, universe)?;
|
||||
if docids.is_disjoint(universe) {
|
||||
graph.remove_edges_with_condition(condition);
|
||||
empty_paths_cache.add_condition(condition);
|
||||
edge_docids_cache.cache.remove(&condition);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
let docids =
|
||||
condition_docids_cache.get_condition_docids(ctx, condition, graph, universe)?;
|
||||
if docids.is_disjoint(universe) {
|
||||
graph.remove_edges_with_condition(condition);
|
||||
dead_end_path_cache.add_condition(condition);
|
||||
condition_docids_cache.cache.remove(&condition);
|
||||
continue;
|
||||
}
|
||||
}
|
||||
Ok(())
|
||||
@ -139,17 +135,17 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
query_graph: &QueryGraph,
|
||||
) -> Result<()> {
|
||||
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
||||
let mut edge_docids_cache = EdgeConditionDocIdsCache::default();
|
||||
let mut empty_paths_cache = DeadEndPathCache::new(&graph.conditions_interner);
|
||||
let mut condition_docids_cache = EdgeConditionDocIdsCache::default();
|
||||
let mut dead_end_path_cache = DeadEndPathCache::new(&graph.conditions_interner);
|
||||
|
||||
// First simplify the graph as much as possible, by computing the docids of the edges
|
||||
// First simplify the graph as much as possible, by computing the docids of all the conditions
|
||||
// within the rule's universe and removing the edges that have no associated docids.
|
||||
remove_empty_edges(
|
||||
ctx,
|
||||
&mut graph,
|
||||
&mut edge_docids_cache,
|
||||
&mut condition_docids_cache,
|
||||
universe,
|
||||
&mut empty_paths_cache,
|
||||
&mut dead_end_path_cache,
|
||||
)?;
|
||||
|
||||
// Then pre-compute the cost of all paths from each node to the end node
|
||||
@ -157,8 +153,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
|
||||
let state = GraphBasedRankingRuleState {
|
||||
graph,
|
||||
edge_conditions_cache: edge_docids_cache,
|
||||
empty_paths_cache,
|
||||
edge_conditions_cache: condition_docids_cache,
|
||||
dead_end_path_cache,
|
||||
all_distances,
|
||||
cur_distance_idx: 0,
|
||||
};
|
||||
@ -187,7 +183,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
&mut state.graph,
|
||||
&mut state.edge_conditions_cache,
|
||||
universe,
|
||||
&mut state.empty_paths_cache,
|
||||
&mut state.dead_end_path_cache,
|
||||
)?;
|
||||
|
||||
// If the cur_distance_idx does not point to a valid cost in the `all_distances`
|
||||
@ -208,8 +204,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
|
||||
let GraphBasedRankingRuleState {
|
||||
graph,
|
||||
edge_conditions_cache: edge_docids_cache,
|
||||
empty_paths_cache,
|
||||
edge_conditions_cache: condition_docids_cache,
|
||||
dead_end_path_cache,
|
||||
all_distances,
|
||||
cur_distance_idx: _,
|
||||
} = &mut state;
|
||||
@ -224,18 +220,18 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
// For each path of the given cost, we will compute its associated
|
||||
// document ids.
|
||||
// In case the path does not resolve to any document id, we try to figure out why
|
||||
// and update the `empty_paths_cache` accordingly.
|
||||
// and update the `dead_end_path_cache` accordingly.
|
||||
// For example, it may be that the path is empty because one of its edges is disjoint
|
||||
// with the universe, or because a prefix of the path is disjoint with the universe, or because
|
||||
// the path contains two edges that are disjoint from each other within the universe.
|
||||
// Updating the empty_paths_cache helps speed up the execution of `visit_paths_of_cost` and reduces
|
||||
// Updating the dead_end_path_cache helps speed up the execution of `visit_paths_of_cost` and reduces
|
||||
// the number of future candidate paths given by that same function.
|
||||
graph.visit_paths_of_cost(
|
||||
graph.query_graph.root_node,
|
||||
cost,
|
||||
all_distances,
|
||||
empty_paths_cache,
|
||||
|path, graph, empty_paths_cache| {
|
||||
dead_end_path_cache,
|
||||
|path, graph, dead_end_path_cache| {
|
||||
// Accumulate the path for logging purposes only
|
||||
paths.push(path.to_vec());
|
||||
let mut path_docids = universe.clone();
|
||||
@ -243,47 +239,48 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
// We store the edges and their docids in vectors in case the path turns out to be
|
||||
// empty and we need to figure out why it was empty.
|
||||
let mut visited_conditions = vec![];
|
||||
let mut cached_edge_docids = vec![];
|
||||
let mut cached_condition_docids = vec![];
|
||||
// graph.conditions_interner.map(|_| RoaringBitmap::new());
|
||||
|
||||
for &condition in path {
|
||||
visited_conditions.push(condition);
|
||||
|
||||
let edge_docids =
|
||||
edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?;
|
||||
let condition_docids = condition_docids_cache
|
||||
.get_condition_docids(ctx, condition, graph, &universe)?;
|
||||
|
||||
cached_edge_docids.push((condition, edge_docids.clone())); // .get_mut(condition) = edge_docids.clone();
|
||||
cached_condition_docids.push((condition, condition_docids.clone())); // .get_mut(condition) = condition_docids.clone();
|
||||
|
||||
// If the edge is empty, then the path will be empty as well, we update the graph
|
||||
// and caches accordingly and skip to the next candidate path.
|
||||
if edge_docids.is_disjoint(&universe) {
|
||||
if condition_docids.is_disjoint(&universe) {
|
||||
// 1. Store in the cache that this edge is empty for this universe
|
||||
empty_paths_cache.add_condition(condition);
|
||||
dead_end_path_cache.add_condition(condition);
|
||||
// 2. remove this edge from the ranking rule graph
|
||||
// ouch, no! :( need to link a condition to one or more ranking rule edges
|
||||
graph.remove_edges_with_condition(condition);
|
||||
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
|
||||
edge_docids_cache.cache.remove(&condition);
|
||||
// 3. Also remove the entry from the condition_docids_cache, since we don't need it anymore
|
||||
condition_docids_cache.cache.remove(&condition);
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
path_docids &= edge_docids;
|
||||
path_docids &= condition_docids;
|
||||
|
||||
// If the (sub)path is empty, we try to figure out why and update the caches accordingly.
|
||||
if path_docids.is_disjoint(&universe) {
|
||||
// First, we know that this path is empty, and thus any path
|
||||
// that is a superset of it will also be empty.
|
||||
empty_paths_cache.add_prefix(&visited_conditions);
|
||||
dead_end_path_cache.add_prefix(&visited_conditions);
|
||||
// Second, if the intersection between this edge and any
|
||||
// previous one is disjoint with the universe,
|
||||
// then we also know that any path containing the same couple of
|
||||
// edges will also be empty.
|
||||
for (past_condition, edge_docids2) in cached_edge_docids.iter() {
|
||||
for (past_condition, condition_docids2) in cached_condition_docids.iter() {
|
||||
if *past_condition == condition {
|
||||
continue;
|
||||
};
|
||||
let intersection = edge_docids & edge_docids2;
|
||||
let intersection = condition_docids & condition_docids2;
|
||||
if intersection.is_disjoint(&universe) {
|
||||
empty_paths_cache.add_condition_couple(*past_condition, condition);
|
||||
dead_end_path_cache
|
||||
.add_condition_couple(*past_condition, condition);
|
||||
}
|
||||
}
|
||||
// We should maybe instead try to compute:
|
||||
@ -310,7 +307,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
G::log_state(
|
||||
&original_graph,
|
||||
&paths,
|
||||
empty_paths_cache,
|
||||
dead_end_path_cache,
|
||||
original_universe,
|
||||
all_distances,
|
||||
cost,
|
||||
@ -322,8 +319,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
// But we only do it in case the bucket length is >1, because otherwise
|
||||
// we know the child ranking rule won't be called anyway
|
||||
let mut next_query_graph = original_graph.query_graph;
|
||||
next_query_graph.simplify();
|
||||
if bucket.len() > 1 {
|
||||
next_query_graph.simplify();
|
||||
// 1. Gather all the words and phrases used in the computation of this bucket
|
||||
let mut used_words = HashSet::new();
|
||||
let mut used_phrases = HashSet::new();
|
||||
|
@ -25,6 +25,8 @@ impl<T> Interned<T> {
|
||||
/// be copied, compared, and hashed efficiently. An immutable reference to the original value
|
||||
/// can be retrieved using `self.get(interned)`. A set of values within the interner can be
|
||||
/// efficiently managed using [`SmallBitmap<T>`](super::small_bitmap::SmallBitmap).
|
||||
///
|
||||
/// A dedup-interner can contain a maximum of `u16::MAX` values.
|
||||
#[derive(Clone)]
|
||||
pub struct DedupInterner<T> {
|
||||
stable_store: Vec<T>,
|
||||
@ -36,7 +38,8 @@ impl<T> Default for DedupInterner<T> {
|
||||
}
|
||||
}
|
||||
impl<T> DedupInterner<T> {
|
||||
///
|
||||
/// Convert the dedup-interner into a fixed-size interner, such that new
|
||||
/// elements cannot be added to it anymore.
|
||||
pub fn freeze(self) -> FixedSizeInterner<T> {
|
||||
FixedSizeInterner { stable_store: self.stable_store }
|
||||
}
|
||||
@ -46,6 +49,8 @@ impl<T> DedupInterner<T>
|
||||
where
|
||||
T: Clone + Eq + Hash,
|
||||
{
|
||||
/// Insert the given value into the dedup-interner, and return
|
||||
/// its index.
|
||||
pub fn insert(&mut self, s: T) -> Interned<T> {
|
||||
if let Some(interned) = self.lookup.get(&s) {
|
||||
*interned
|
||||
@ -57,35 +62,21 @@ where
|
||||
interned
|
||||
}
|
||||
}
|
||||
/// Get a reference to the interned value.
|
||||
pub fn get(&self, interned: Interned<T>) -> &T {
|
||||
&self.stable_store[interned.idx as usize]
|
||||
}
|
||||
}
|
||||
#[derive(Clone)]
|
||||
pub struct Interner<T> {
|
||||
stable_store: Vec<T>,
|
||||
}
|
||||
impl<T> Default for Interner<T> {
|
||||
fn default() -> Self {
|
||||
Self { stable_store: Default::default() }
|
||||
}
|
||||
}
|
||||
impl<T> Interner<T> {
|
||||
pub fn freeze(self) -> FixedSizeInterner<T> {
|
||||
FixedSizeInterner { stable_store: self.stable_store }
|
||||
}
|
||||
pub fn push(&mut self, s: T) -> Interned<T> {
|
||||
assert!(self.stable_store.len() < u16::MAX as usize);
|
||||
self.stable_store.push(s);
|
||||
Interned::from_raw(self.stable_store.len() as u16 - 1)
|
||||
}
|
||||
}
|
||||
|
||||
/// A fixed-length store for values of type `T`, where each value is identified
|
||||
/// by an index of type [`Interned<T>`].
|
||||
#[derive(Clone)]
|
||||
pub struct FixedSizeInterner<T> {
|
||||
stable_store: Vec<T>,
|
||||
}
|
||||
impl<T: Clone> FixedSizeInterner<T> {
|
||||
/// Create a fixed-size interner of the given length containing
|
||||
/// clones of the given value.
|
||||
pub fn new(length: u16, value: T) -> Self {
|
||||
Self { stable_store: vec![value; length as usize] }
|
||||
}
|
||||
@ -105,7 +96,6 @@ impl<T> FixedSizeInterner<T> {
|
||||
pub fn len(&self) -> u16 {
|
||||
self.stable_store.len() as u16
|
||||
}
|
||||
|
||||
pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<U, T> {
|
||||
MappedInterner {
|
||||
stable_store: self.stable_store.iter().map(map_f).collect(),
|
||||
@ -122,6 +112,12 @@ impl<T> FixedSizeInterner<T> {
|
||||
self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
|
||||
}
|
||||
}
|
||||
|
||||
/// A store of values of type `T`, each linked to a value of type `From`
|
||||
/// stored in another interner. To create a mapped interner, use the
|
||||
/// `map` method on [`FixedSizeInterner`] or [`MappedInterner`].
|
||||
///
|
||||
/// Values in this interner are indexed with [`Interned<From>`].
|
||||
#[derive(Clone)]
|
||||
pub struct MappedInterner<T, From> {
|
||||
stable_store: Vec<T>,
|
||||
|
@ -10,7 +10,7 @@ use crate::search::new::interner::{Interned, MappedInterner};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
|
||||
use crate::search::new::ranking_rule_graph::{
|
||||
DeadEndPathCache, Edge, EdgeCondition, ProximityCondition, ProximityGraph, RankingRuleGraph,
|
||||
DeadEndPathCache, Edge, ProximityCondition, ProximityGraph, RankingRuleGraph,
|
||||
RankingRuleGraphTrait, TypoEdge, TypoGraph,
|
||||
};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
@ -44,7 +44,7 @@ pub enum SearchEvents {
|
||||
ProximityState {
|
||||
graph: RankingRuleGraph<ProximityGraph>,
|
||||
paths: Vec<Vec<Interned<ProximityCondition>>>,
|
||||
empty_paths_cache: DeadEndPathCache<ProximityGraph>,
|
||||
dead_end_path_cache: DeadEndPathCache<ProximityGraph>,
|
||||
universe: RoaringBitmap,
|
||||
distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
@ -52,7 +52,7 @@ pub enum SearchEvents {
|
||||
TypoState {
|
||||
graph: RankingRuleGraph<TypoGraph>,
|
||||
paths: Vec<Vec<Interned<TypoEdge>>>,
|
||||
empty_paths_cache: DeadEndPathCache<TypoGraph>,
|
||||
dead_end_path_cache: DeadEndPathCache<TypoGraph>,
|
||||
universe: RoaringBitmap,
|
||||
distances: MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||
cost: u16,
|
||||
@ -170,7 +170,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||
paths_map: &[Vec<Interned<ProximityCondition>>],
|
||||
empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
||||
dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
@ -178,7 +178,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
self.events.push(SearchEvents::ProximityState {
|
||||
graph: query_graph.clone(),
|
||||
paths: paths_map.to_vec(),
|
||||
empty_paths_cache: empty_paths_cache.clone(),
|
||||
dead_end_path_cache: dead_end_path_cache.clone(),
|
||||
universe: universe.clone(),
|
||||
distances: distances.clone(),
|
||||
cost,
|
||||
@ -189,7 +189,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||
paths_map: &[Vec<Interned<TypoEdge>>],
|
||||
empty_paths_cache: &DeadEndPathCache<TypoGraph>,
|
||||
dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||
cost: u16,
|
||||
@ -197,7 +197,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
self.events.push(SearchEvents::TypoState {
|
||||
graph: query_graph.clone(),
|
||||
paths: paths_map.to_vec(),
|
||||
empty_paths_cache: empty_paths_cache.clone(),
|
||||
dead_end_path_cache: dead_end_path_cache.clone(),
|
||||
universe: universe.clone(),
|
||||
distances: distances.clone(),
|
||||
cost,
|
||||
@ -358,7 +358,7 @@ results.{random} {{
|
||||
SearchEvents::ProximityState {
|
||||
graph,
|
||||
paths,
|
||||
empty_paths_cache,
|
||||
dead_end_path_cache,
|
||||
universe,
|
||||
distances,
|
||||
cost,
|
||||
@ -374,7 +374,7 @@ results.{random} {{
|
||||
ctx,
|
||||
graph,
|
||||
paths,
|
||||
empty_paths_cache,
|
||||
dead_end_path_cache,
|
||||
distances.clone(),
|
||||
&mut new_file,
|
||||
);
|
||||
@ -391,7 +391,7 @@ results.{random} {{
|
||||
SearchEvents::TypoState {
|
||||
graph,
|
||||
paths,
|
||||
empty_paths_cache,
|
||||
dead_end_path_cache,
|
||||
universe,
|
||||
distances,
|
||||
cost,
|
||||
@ -407,7 +407,7 @@ results.{random} {{
|
||||
ctx,
|
||||
graph,
|
||||
paths,
|
||||
empty_paths_cache,
|
||||
dead_end_path_cache,
|
||||
distances.clone(),
|
||||
&mut new_file,
|
||||
);
|
||||
@ -547,11 +547,11 @@ shape: class"
|
||||
let Edge { source_node, dest_node, condition: details, cost } = edge;
|
||||
|
||||
match &details {
|
||||
EdgeCondition::Unconditional => {
|
||||
None => {
|
||||
writeln!(file, "{source_node} -> {dest_node} : \"always cost {cost}\"",)
|
||||
.unwrap();
|
||||
}
|
||||
EdgeCondition::Conditional(condition) => {
|
||||
Some(condition) => {
|
||||
// let condition = graph.conditions_interner.get(*condition);
|
||||
writeln!(
|
||||
file,
|
||||
|
@ -66,7 +66,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||
paths: &[Vec<Interned<ProximityCondition>>],
|
||||
empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
||||
dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
@ -77,7 +77,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||
paths: &[Vec<Interned<TypoEdge>>],
|
||||
empty_paths_cache: &DeadEndPathCache<TypoGraph>,
|
||||
dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||
cost: u16,
|
||||
@ -137,7 +137,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
&mut self,
|
||||
_query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||
_paths_map: &[Vec<Interned<ProximityCondition>>],
|
||||
_empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
||||
_dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
|
||||
_universe: &RoaringBitmap,
|
||||
_distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
_cost: u16,
|
||||
@ -148,7 +148,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
&mut self,
|
||||
_query_graph: &RankingRuleGraph<TypoGraph>,
|
||||
_paths: &[Vec<Interned<TypoEdge>>],
|
||||
_empty_paths_cache: &DeadEndPathCache<TypoGraph>,
|
||||
_dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
|
||||
_universe: &RoaringBitmap,
|
||||
_distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||
_cost: u16,
|
||||
|
@ -36,6 +36,7 @@ use crate::search::new::query_term::located_query_terms_from_string;
|
||||
use crate::search::new::words::Words;
|
||||
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
||||
|
||||
/// A structure used throughout the execution of a search query.
|
||||
pub struct SearchContext<'ctx> {
|
||||
pub index: &'ctx Index,
|
||||
pub txn: &'ctx RoTxn<'ctx>,
|
||||
@ -59,6 +60,7 @@ impl<'ctx> SearchContext<'ctx> {
|
||||
}
|
||||
}
|
||||
|
||||
/// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it.
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
fn resolve_maximally_reduced_query_graph<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
@ -100,6 +102,8 @@ fn resolve_maximally_reduced_query_graph<'ctx>(
|
||||
|
||||
Ok(docids)
|
||||
}
|
||||
|
||||
/// Return the list of initialised ranking rules to be used for a placeholder search.
|
||||
fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
ctx: &SearchContext<'ctx>,
|
||||
) -> Result<Vec<Box<dyn RankingRule<'ctx, PlaceholderQuery>>>> {
|
||||
@ -123,6 +127,8 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||
}
|
||||
Ok(ranking_rules)
|
||||
}
|
||||
|
||||
/// Return the list of initialised ranking rules to be used for a query graph search.
|
||||
fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||
ctx: &SearchContext<'ctx>,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
|
@ -1,7 +1,7 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{DedupInterner, Interner};
|
||||
use crate::search::new::interner::DedupInterner;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::search::new::{QueryGraph, SearchContext};
|
||||
use crate::Result;
|
||||
@ -19,7 +19,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
|
||||
let mut conditions_interner = DedupInterner::default();
|
||||
|
||||
let mut edges_store = Interner::default();
|
||||
let mut edges_store = DedupInterner::default();
|
||||
let mut edges_of_node = query_graph.nodes.map(|_| HashSet::new());
|
||||
|
||||
for (source_id, source_node) in graph_nodes.iter() {
|
||||
@ -33,7 +33,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
}
|
||||
|
||||
for (cost, condition) in edges {
|
||||
let new_edge_id = edges_store.push(Some(Edge {
|
||||
let new_edge_id = edges_store.insert(Some(Edge {
|
||||
source_node: source_id,
|
||||
dest_node: dest_idx,
|
||||
cost,
|
||||
|
@ -4,8 +4,8 @@ use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use super::empty_paths_cache::DeadEndPathCache;
|
||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use super::dead_end_path_cache::DeadEndPathCache;
|
||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{Interned, MappedInterner};
|
||||
use crate::search::new::query_graph::QueryNode;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
@ -23,7 +23,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
from: Interned<QueryNode>,
|
||||
cost: u16,
|
||||
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||
empty_paths_cache: &mut DeadEndPathCache<G>,
|
||||
dead_end_path_cache: &mut DeadEndPathCache<G>,
|
||||
mut visit: impl FnMut(
|
||||
&[Interned<G::EdgeCondition>],
|
||||
&mut Self,
|
||||
@ -34,11 +34,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
from,
|
||||
cost,
|
||||
all_distances,
|
||||
empty_paths_cache,
|
||||
dead_end_path_cache,
|
||||
&mut visit,
|
||||
&mut vec![],
|
||||
&mut SmallBitmap::for_interned_values_in(&self.conditions_interner),
|
||||
&mut empty_paths_cache.conditions.clone(),
|
||||
&mut dead_end_path_cache.conditions.clone(),
|
||||
)?;
|
||||
Ok(())
|
||||
}
|
||||
@ -47,7 +47,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
from: Interned<QueryNode>,
|
||||
cost: u16,
|
||||
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||
empty_paths_cache: &mut DeadEndPathCache<G>,
|
||||
dead_end_path_cache: &mut DeadEndPathCache<G>,
|
||||
visit: &mut impl FnMut(
|
||||
&[Interned<G::EdgeCondition>],
|
||||
&mut Self,
|
||||
@ -66,10 +66,10 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
continue;
|
||||
}
|
||||
let next_any_valid = match edge.condition {
|
||||
EdgeCondition::Unconditional => {
|
||||
None => {
|
||||
if edge.dest_node == self.query_graph.end_node {
|
||||
any_valid = true;
|
||||
let control_flow = visit(prev_conditions, self, empty_paths_cache)?;
|
||||
let control_flow = visit(prev_conditions, self, dead_end_path_cache)?;
|
||||
match control_flow {
|
||||
ControlFlow::Continue(_) => {}
|
||||
ControlFlow::Break(_) => return Ok(true),
|
||||
@ -80,7 +80,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
edge.dest_node,
|
||||
cost - edge.cost as u16,
|
||||
all_distances,
|
||||
empty_paths_cache,
|
||||
dead_end_path_cache,
|
||||
visit,
|
||||
prev_conditions,
|
||||
cur_path,
|
||||
@ -88,7 +88,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
)?
|
||||
}
|
||||
}
|
||||
EdgeCondition::Conditional(condition) => {
|
||||
Some(condition) => {
|
||||
if forbidden_conditions.contains(condition)
|
||||
|| !all_distances.get(edge.dest_node).iter().any(
|
||||
|(next_cost, necessary_conditions)| {
|
||||
@ -104,8 +104,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
|
||||
let mut new_forbidden_conditions = forbidden_conditions.clone();
|
||||
new_forbidden_conditions
|
||||
.union(empty_paths_cache.condition_couples.get(condition));
|
||||
empty_paths_cache.prefixes.final_edges_after_prefix(
|
||||
.union(dead_end_path_cache.condition_couples.get(condition));
|
||||
dead_end_path_cache.prefixes.final_edges_after_prefix(
|
||||
prev_conditions,
|
||||
&mut |x| {
|
||||
new_forbidden_conditions.insert(x);
|
||||
@ -113,7 +113,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
);
|
||||
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
||||
any_valid = true;
|
||||
let control_flow = visit(prev_conditions, self, empty_paths_cache)?;
|
||||
let control_flow = visit(prev_conditions, self, dead_end_path_cache)?;
|
||||
match control_flow {
|
||||
ControlFlow::Continue(_) => {}
|
||||
ControlFlow::Break(_) => return Ok(true),
|
||||
@ -124,7 +124,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
edge.dest_node,
|
||||
cost - edge.cost as u16,
|
||||
all_distances,
|
||||
empty_paths_cache,
|
||||
dead_end_path_cache,
|
||||
visit,
|
||||
prev_conditions,
|
||||
cur_path,
|
||||
@ -139,15 +139,15 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
any_valid |= next_any_valid;
|
||||
|
||||
if next_any_valid {
|
||||
if empty_paths_cache.path_is_dead_end(prev_conditions, cur_path) {
|
||||
if dead_end_path_cache.path_is_dead_end(prev_conditions, cur_path) {
|
||||
return Ok(any_valid);
|
||||
}
|
||||
forbidden_conditions.union(&empty_paths_cache.conditions);
|
||||
forbidden_conditions.union(&dead_end_path_cache.conditions);
|
||||
for prev_condition in prev_conditions.iter() {
|
||||
forbidden_conditions
|
||||
.union(empty_paths_cache.condition_couples.get(*prev_condition));
|
||||
.union(dead_end_path_cache.condition_couples.get(*prev_condition));
|
||||
}
|
||||
empty_paths_cache.prefixes.final_edges_after_prefix(prev_conditions, &mut |x| {
|
||||
dead_end_path_cache.prefixes.final_edges_after_prefix(prev_conditions, &mut |x| {
|
||||
forbidden_conditions.insert(x);
|
||||
});
|
||||
}
|
||||
@ -178,16 +178,14 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
||||
for edge_idx in cur_node_edges.iter() {
|
||||
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
||||
let condition = match edge.condition {
|
||||
EdgeCondition::Unconditional => None,
|
||||
EdgeCondition::Conditional(condition) => Some(condition),
|
||||
};
|
||||
let succ_node = edge.dest_node;
|
||||
let succ_distances = distances_to_end.get(succ_node);
|
||||
for (succ_distance, succ_necessary_conditions) in succ_distances {
|
||||
let mut potential_necessary_edges =
|
||||
SmallBitmap::for_interned_values_in(&self.conditions_interner);
|
||||
for condition in condition.into_iter().chain(succ_necessary_conditions.iter()) {
|
||||
for condition in
|
||||
edge.condition.into_iter().chain(succ_necessary_conditions.iter())
|
||||
{
|
||||
potential_necessary_edges.insert(condition);
|
||||
}
|
||||
|
||||
|
@ -9,44 +9,43 @@ use crate::search::new::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
/// A cache storing the document ids associated with each ranking rule edge
|
||||
pub struct EdgeConditionDocIdsCache<G: RankingRuleGraphTrait> {
|
||||
pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
|
||||
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
||||
pub cache: FxHashMap<Interned<G::EdgeCondition>, RoaringBitmap>,
|
||||
pub cache: FxHashMap<Interned<G::Condition>, RoaringBitmap>,
|
||||
_phantom: PhantomData<G>,
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> Default for EdgeConditionDocIdsCache<G> {
|
||||
impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> {
|
||||
fn default() -> Self {
|
||||
Self { cache: Default::default(), _phantom: Default::default() }
|
||||
}
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> EdgeConditionDocIdsCache<G> {
|
||||
impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
|
||||
/// Retrieve the document ids for the given edge condition.
|
||||
///
|
||||
/// If the cache does not yet contain these docids, they are computed
|
||||
/// and inserted in the cache.
|
||||
pub fn get_edge_docids<'s, 'ctx>(
|
||||
pub fn get_condition_docids<'s, 'ctx>(
|
||||
&'s mut self,
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
// TODO: should be Interned<EdgeCondition>
|
||||
interned_edge_condition: Interned<G::EdgeCondition>,
|
||||
interned_condition: Interned<G::Condition>,
|
||||
graph: &RankingRuleGraph<G>,
|
||||
// TODO: maybe universe doesn't belong here
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<&'s RoaringBitmap> {
|
||||
if self.cache.contains_key(&interned_edge_condition) {
|
||||
if self.cache.contains_key(&interned_condition) {
|
||||
// TODO: should we update the bitmap in the cache if the new universe
|
||||
// reduces it?
|
||||
// TODO: maybe have a generation: u32 to track every time the universe was
|
||||
// reduced. Then only attempt to recompute the intersection when there is a chance
|
||||
// that edge_docids & universe changed
|
||||
return Ok(&self.cache[&interned_edge_condition]);
|
||||
// that condition_docids & universe changed
|
||||
return Ok(&self.cache[&interned_condition]);
|
||||
}
|
||||
// TODO: maybe universe doesn't belong here
|
||||
let edge_condition = graph.conditions_interner.get(interned_edge_condition);
|
||||
let condition = graph.conditions_interner.get(interned_condition);
|
||||
// TODO: faster way to do this?
|
||||
let docids = universe & G::resolve_edge_condition(ctx, edge_condition, universe)?;
|
||||
let _ = self.cache.insert(interned_edge_condition, docids);
|
||||
let docids = &self.cache[&interned_edge_condition];
|
||||
let docids = universe & G::resolve_condition(ctx, condition, universe)?;
|
||||
let _ = self.cache.insert(interned_condition, docids);
|
||||
let docids = &self.cache[&interned_condition];
|
||||
Ok(docids)
|
||||
}
|
||||
}
|
@ -9,11 +9,11 @@ use crate::search::new::{
|
||||
/// universe.
|
||||
pub struct DeadEndPathCache<G: RankingRuleGraphTrait> {
|
||||
/// The set of edge conditions that resolve to no documents.
|
||||
pub conditions: SmallBitmap<G::EdgeCondition>,
|
||||
pub conditions: SmallBitmap<G::Condition>,
|
||||
/// A set of path prefixes that resolve to no documents.
|
||||
pub prefixes: PathSet<G::EdgeCondition>,
|
||||
pub prefixes: PathSet<G::Condition>,
|
||||
/// A set of empty couples of edge conditions that resolve to no documents.
|
||||
pub condition_couples: MappedInterner<SmallBitmap<G::EdgeCondition>, G::EdgeCondition>,
|
||||
pub condition_couples: MappedInterner<SmallBitmap<G::Condition>, G::Condition>,
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
|
||||
fn clone(&self) -> Self {
|
||||
@ -27,17 +27,17 @@ impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
|
||||
|
||||
impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
||||
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
||||
pub fn new(all_edge_conditions: &FixedSizeInterner<G::EdgeCondition>) -> Self {
|
||||
pub fn new(all_conditions: &FixedSizeInterner<G::Condition>) -> Self {
|
||||
Self {
|
||||
conditions: SmallBitmap::for_interned_values_in(all_edge_conditions),
|
||||
conditions: SmallBitmap::for_interned_values_in(all_conditions),
|
||||
prefixes: PathSet::default(),
|
||||
condition_couples: all_edge_conditions
|
||||
.map(|_| SmallBitmap::for_interned_values_in(all_edge_conditions)),
|
||||
condition_couples: all_conditions
|
||||
.map(|_| SmallBitmap::for_interned_values_in(all_conditions)),
|
||||
}
|
||||
}
|
||||
|
||||
/// Store in the cache that every path containing the given edge resolves to no documents.
|
||||
pub fn add_condition(&mut self, condition: Interned<G::EdgeCondition>) {
|
||||
pub fn add_condition(&mut self, condition: Interned<G::Condition>) {
|
||||
self.conditions.insert(condition);
|
||||
self.condition_couples.get_mut(condition).clear();
|
||||
self.prefixes.remove_edge(condition);
|
||||
@ -46,7 +46,7 @@ impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
||||
}
|
||||
}
|
||||
/// Store in the cache that every path containing the given prefix resolves to no documents.
|
||||
pub fn add_prefix(&mut self, prefix: &[Interned<G::EdgeCondition>]) {
|
||||
pub fn add_prefix(&mut self, prefix: &[Interned<G::Condition>]) {
|
||||
// TODO: typed PathSet
|
||||
self.prefixes.insert(prefix.iter().copied());
|
||||
}
|
||||
@ -54,8 +54,8 @@ impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
||||
/// Store in the cache that every path containing the two given edges resolves to no documents.
|
||||
pub fn add_condition_couple(
|
||||
&mut self,
|
||||
edge1: Interned<G::EdgeCondition>,
|
||||
edge2: Interned<G::EdgeCondition>,
|
||||
edge1: Interned<G::Condition>,
|
||||
edge2: Interned<G::Condition>,
|
||||
) {
|
||||
self.condition_couples.get_mut(edge1).insert(edge2);
|
||||
}
|
||||
@ -63,8 +63,8 @@ impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
||||
/// Returns true if the cache can determine that the given path resolves to no documents.
|
||||
pub fn path_is_dead_end(
|
||||
&self,
|
||||
path: &[Interned<G::EdgeCondition>],
|
||||
path_bitmap: &SmallBitmap<G::EdgeCondition>,
|
||||
path: &[Interned<G::Condition>],
|
||||
path_bitmap: &SmallBitmap<G::Condition>,
|
||||
) -> bool {
|
||||
if path_bitmap.intersects(&self.conditions) {
|
||||
return true;
|
@ -7,8 +7,8 @@ the same but the edges are replaced.
|
||||
|
||||
mod build;
|
||||
mod cheapest_paths;
|
||||
mod edge_docids_cache;
|
||||
mod empty_paths_cache;
|
||||
mod condition_docids_cache;
|
||||
mod dead_end_path_cache;
|
||||
mod path_set;
|
||||
|
||||
/// Implementation of the `proximity` ranking rule
|
||||
@ -19,8 +19,8 @@ mod typo;
|
||||
use std::collections::HashSet;
|
||||
use std::hash::Hash;
|
||||
|
||||
pub use edge_docids_cache::EdgeConditionDocIdsCache;
|
||||
pub use empty_paths_cache::DeadEndPathCache;
|
||||
pub use condition_docids_cache::EdgeConditionDocIdsCache;
|
||||
pub use dead_end_path_cache::DeadEndPathCache;
|
||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||
use roaring::RoaringBitmap;
|
||||
pub use typo::{TypoEdge, TypoGraph};
|
||||
@ -32,31 +32,6 @@ use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
/// The condition that is associated with an edge in the ranking rule graph.
|
||||
///
|
||||
/// Some edges are unconditional, which means that traversing them does not reduce
|
||||
/// the set of candidates.
|
||||
///
|
||||
/// Most edges, however, have a condition attached to them. For example, for the
|
||||
/// proximity ranking rule, the condition could be that a word is N-close to another one.
|
||||
/// When the edge is traversed, some database operations are executed to retrieve the set
|
||||
/// of documents that satisfy the condition, which reduces the list of candidate document ids.
|
||||
pub enum EdgeCondition<E> {
|
||||
Unconditional,
|
||||
Conditional(Interned<E>),
|
||||
}
|
||||
|
||||
impl<E> Copy for EdgeCondition<E> {}
|
||||
|
||||
impl<E> Clone for EdgeCondition<E> {
|
||||
fn clone(&self) -> Self {
|
||||
match self {
|
||||
Self::Unconditional => Self::Unconditional,
|
||||
Self::Conditional(arg0) => Self::Conditional(*arg0),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// An edge in the ranking rule graph.
|
||||
///
|
||||
/// It contains:
|
||||
@ -68,7 +43,27 @@ pub struct Edge<E> {
|
||||
pub source_node: Interned<QueryNode>,
|
||||
pub dest_node: Interned<QueryNode>,
|
||||
pub cost: u8,
|
||||
pub condition: EdgeCondition<E>,
|
||||
pub condition: Option<Interned<E>>,
|
||||
}
|
||||
|
||||
impl<E> Hash for Edge<E> {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.source_node.hash(state);
|
||||
self.dest_node.hash(state);
|
||||
self.cost.hash(state);
|
||||
self.condition.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<E> Eq for Edge<E> {}
|
||||
|
||||
impl<E> PartialEq for Edge<E> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.source_node == other.source_node
|
||||
&& self.dest_node == other.dest_node
|
||||
&& self.cost == other.cost
|
||||
&& self.condition == other.condition
|
||||
}
|
||||
}
|
||||
|
||||
/// A trait to be implemented by a marker type to build a graph-based ranking rule.
|
||||
@ -113,12 +108,12 @@ pub trait RankingRuleGraphTrait: Sized {
|
||||
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||
source_node: &QueryNode,
|
||||
dest_node: &QueryNode,
|
||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>;
|
||||
) -> Result<Vec<(u8, Option<Interned<Self::EdgeCondition>>)>>;
|
||||
|
||||
fn log_state(
|
||||
graph: &RankingRuleGraph<Self>,
|
||||
paths: &[Vec<Interned<Self::EdgeCondition>>],
|
||||
empty_paths_cache: &DeadEndPathCache<Self>,
|
||||
dead_end_path_cache: &DeadEndPathCache<Self>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<Self::EdgeCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
@ -151,15 +146,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
pub fn remove_edges_with_condition(&mut self, condition_to_remove: Interned<G::EdgeCondition>) {
|
||||
for (edge_id, edge_opt) in self.edges_store.iter_mut() {
|
||||
let Some(edge) = edge_opt.as_mut() else { continue };
|
||||
match edge.condition {
|
||||
EdgeCondition::Unconditional => continue,
|
||||
EdgeCondition::Conditional(condition) => {
|
||||
if condition == condition_to_remove {
|
||||
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
||||
*edge_opt = None;
|
||||
self.edges_of_node.get_mut(source_node).remove(edge_id);
|
||||
}
|
||||
}
|
||||
let Some(condition) = edge.condition else { continue };
|
||||
|
||||
if condition == condition_to_remove {
|
||||
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
||||
*edge_opt = None;
|
||||
self.edges_of_node.get_mut(source_node).remove(edge_id);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -4,7 +4,7 @@
|
||||
|
||||
use crate::search::new::interner::Interned;
|
||||
|
||||
/// A set of [`Path`]
|
||||
/// A set of `Vec<Interned<T>>`.
|
||||
pub struct PathSet<T> {
|
||||
nodes: Vec<(Interned<T>, Self)>,
|
||||
is_end: bool,
|
||||
|
@ -7,7 +7,6 @@ use crate::search::new::interner::{DedupInterner, Interned};
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm};
|
||||
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
||||
use crate::search::new::ranking_rule_graph::EdgeCondition;
|
||||
use crate::search::new::{QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
use heed::RoTxn;
|
||||
@ -40,7 +39,7 @@ pub fn build_edges<'ctx>(
|
||||
conditions_interner: &mut DedupInterner<ProximityCondition>,
|
||||
from_node: &QueryNode,
|
||||
to_node: &QueryNode,
|
||||
) -> Result<Vec<(u8, EdgeCondition<ProximityCondition>)>> {
|
||||
) -> Result<Vec<(u8, Option<Interned<ProximityCondition>>)>> {
|
||||
let SearchContext {
|
||||
index,
|
||||
txn,
|
||||
@ -52,7 +51,7 @@ pub fn build_edges<'ctx>(
|
||||
} = ctx;
|
||||
|
||||
let right_term = match &to_node.data {
|
||||
QueryNodeData::End => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNodeData::End => return Ok(vec![(0, None)]),
|
||||
QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]),
|
||||
QueryNodeData::Term(term) => term,
|
||||
};
|
||||
@ -70,7 +69,7 @@ pub fn build_edges<'ctx>(
|
||||
QueryNodeData::Start => {
|
||||
return Ok(vec![(
|
||||
(right_ngram_length - 1) as u8,
|
||||
EdgeCondition::Conditional(
|
||||
Some(
|
||||
conditions_interner
|
||||
.insert(ProximityCondition::Term { term: *right_term_interned }),
|
||||
),
|
||||
@ -88,7 +87,7 @@ pub fn build_edges<'ctx>(
|
||||
// but `sun` and `are` have no proximity condition between them
|
||||
return Ok(vec![(
|
||||
(right_ngram_length - 1) as u8,
|
||||
EdgeCondition::Conditional(
|
||||
Some(
|
||||
conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }),
|
||||
),
|
||||
)]);
|
||||
@ -140,7 +139,7 @@ pub fn build_edges<'ctx>(
|
||||
.map(|(cost, word_pairs)| {
|
||||
(
|
||||
cost,
|
||||
EdgeCondition::Conditional(
|
||||
Some(
|
||||
conditions_interner
|
||||
.insert(ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice() }),
|
||||
),
|
||||
@ -149,9 +148,7 @@ pub fn build_edges<'ctx>(
|
||||
.collect::<Vec<_>>();
|
||||
new_edges.push((
|
||||
8 + (right_ngram_length - 1) as u8,
|
||||
EdgeCondition::Conditional(
|
||||
conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }),
|
||||
),
|
||||
Some(conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned })),
|
||||
));
|
||||
Ok(new_edges)
|
||||
}
|
||||
|
@ -6,8 +6,8 @@ use std::iter::FromIterator;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::DeadEndPathCache;
|
||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use super::dead_end_path_cache::DeadEndPathCache;
|
||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||
use crate::search::new::logger::SearchLogger;
|
||||
use crate::search::new::query_term::{Phrase, QueryTerm};
|
||||
@ -60,20 +60,20 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||
source_node: &QueryNode,
|
||||
dest_node: &QueryNode,
|
||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||
) -> Result<Vec<(u8, Option<Interned<Self::EdgeCondition>>)>> {
|
||||
build::build_edges(ctx, conditions_interner, source_node, dest_node)
|
||||
}
|
||||
|
||||
fn log_state(
|
||||
graph: &RankingRuleGraph<Self>,
|
||||
paths: &[Vec<Interned<ProximityCondition>>],
|
||||
empty_paths_cache: &DeadEndPathCache<Self>,
|
||||
dead_end_path_cache: &DeadEndPathCache<Self>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||
cost: u16,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) {
|
||||
logger.log_proximity_state(graph, paths, empty_paths_cache, universe, distances, cost);
|
||||
logger.log_proximity_state(graph, paths, dead_end_path_cache, universe, distances, cost);
|
||||
}
|
||||
|
||||
fn label_for_edge_condition<'ctx>(
|
||||
|
@ -1,7 +1,7 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::DeadEndPathCache;
|
||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use super::dead_end_path_cache::DeadEndPathCache;
|
||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||
use crate::search::new::logger::SearchLogger;
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
@ -58,7 +58,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||
_from_node: &QueryNode,
|
||||
to_node: &QueryNode,
|
||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
||||
) -> Result<Vec<(u8, Option<Interned<Self::EdgeCondition>>)>> {
|
||||
let SearchContext { term_interner, .. } = ctx;
|
||||
match &to_node.data {
|
||||
QueryNodeData::Term(LocatedQueryTerm { value, positions }) => {
|
||||
@ -121,7 +121,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
if !new_term.is_empty() {
|
||||
edges.push((
|
||||
nbr_typos as u8 + base_cost,
|
||||
EdgeCondition::Conditional(conditions_interner.insert(TypoEdge {
|
||||
Some(conditions_interner.insert(TypoEdge {
|
||||
term: term_interner.insert(new_term),
|
||||
nbr_typos: nbr_typos as u8,
|
||||
})),
|
||||
@ -130,7 +130,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
}
|
||||
Ok(edges)
|
||||
}
|
||||
QueryNodeData::End => Ok(vec![(0, EdgeCondition::Unconditional)]),
|
||||
QueryNodeData::End => Ok(vec![(0, None)]),
|
||||
QueryNodeData::Deleted | QueryNodeData::Start => panic!(),
|
||||
}
|
||||
}
|
||||
@ -138,13 +138,13 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
fn log_state(
|
||||
graph: &RankingRuleGraph<Self>,
|
||||
paths: &[Vec<Interned<TypoEdge>>],
|
||||
empty_paths_cache: &DeadEndPathCache<Self>,
|
||||
dead_end_path_cache: &DeadEndPathCache<Self>,
|
||||
universe: &RoaringBitmap,
|
||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||
cost: u16,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) {
|
||||
logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances, cost);
|
||||
logger.log_typo_state(graph, paths, dead_end_path_cache, universe, distances, cost);
|
||||
}
|
||||
|
||||
fn label_for_edge_condition<'ctx>(
|
||||
|
@ -2,6 +2,7 @@ use std::marker::PhantomData;
|
||||
|
||||
use super::interner::{FixedSizeInterner, Interned};
|
||||
|
||||
/// A compact set of [`Interned<T>`]
|
||||
pub struct SmallBitmap<T> {
|
||||
internal: SmallBitmapInternal,
|
||||
_phantom: PhantomData<T>,
|
||||
|
Loading…
x
Reference in New Issue
Block a user