mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-24 21:50:07 +01:00
Replace EdgeCondition with an Option<..> + other code cleanup
This commit is contained in:
parent
7b1d8f4c6d
commit
aa59c3bc2c
@ -20,10 +20,17 @@ pub struct DistinctOutput {
|
|||||||
pub excluded: RoaringBitmap,
|
pub excluded: RoaringBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return a [`DistinctOutput`] containing:
|
||||||
|
/// - `remaining`: a set of docids built such that exactly one element from `candidates`
|
||||||
|
/// is kept for each distinct value inside the given field. If the field does not exist, it
|
||||||
|
/// is considered unique.
|
||||||
|
/// - `excluded`: the set of document ids that contain a value for the given field that occurs
|
||||||
|
/// in the given candidates.
|
||||||
pub fn apply_distinct_rule<'ctx>(
|
pub fn apply_distinct_rule<'ctx>(
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
field_id: u16,
|
field_id: u16,
|
||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
|
// TODO: add a universe here, such that the `excluded` are a subset of the universe?
|
||||||
) -> Result<DistinctOutput> {
|
) -> Result<DistinctOutput> {
|
||||||
let mut excluded = RoaringBitmap::new();
|
let mut excluded = RoaringBitmap::new();
|
||||||
let mut remaining = RoaringBitmap::new();
|
let mut remaining = RoaringBitmap::new();
|
||||||
@ -37,6 +44,7 @@ pub fn apply_distinct_rule<'ctx>(
|
|||||||
Ok(DistinctOutput { remaining, excluded })
|
Ok(DistinctOutput { remaining, excluded })
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Apply the distinct rule defined by [`apply_distinct_rule`] for a single document id.
|
||||||
fn distinct_single_docid(
|
fn distinct_single_docid(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &RoTxn,
|
txn: &RoTxn,
|
||||||
@ -69,6 +77,7 @@ fn distinct_single_docid(
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return all the docids containing the given value in the given field
|
||||||
fn facet_value_docids(
|
fn facet_value_docids(
|
||||||
database: Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
database: Database<FacetGroupKeyCodec<ByteSliceRefCodec>, FacetGroupValueCodec>,
|
||||||
txn: &RoTxn,
|
txn: &RoTxn,
|
||||||
@ -79,13 +88,15 @@ fn facet_value_docids(
|
|||||||
.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value })
|
.get(txn, &FacetGroupKey { field_id, level: 0, left_bound: facet_value })
|
||||||
.map(|opt| opt.map(|v| v.bitmap))
|
.map(|opt| opt.map(|v| v.bitmap))
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return an iterator over each number value in the given field of the given document.
|
||||||
fn facet_number_values<'a>(
|
fn facet_number_values<'a>(
|
||||||
id: u32,
|
docid: u32,
|
||||||
distinct: u16,
|
field_id: u16,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'a RoTxn,
|
txn: &'a RoTxn,
|
||||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Unit>> {
|
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Unit>> {
|
||||||
let key = facet_values_prefix_key(distinct, id);
|
let key = facet_values_prefix_key(field_id, docid);
|
||||||
|
|
||||||
let iter = index
|
let iter = index
|
||||||
.field_id_docid_facet_f64s
|
.field_id_docid_facet_f64s
|
||||||
@ -96,13 +107,14 @@ fn facet_number_values<'a>(
|
|||||||
Ok(iter)
|
Ok(iter)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return an iterator over each string value in the given field of the given document.
|
||||||
fn facet_string_values<'a>(
|
fn facet_string_values<'a>(
|
||||||
docid: u32,
|
docid: u32,
|
||||||
distinct: u16,
|
field_id: u16,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'a RoTxn,
|
txn: &'a RoTxn,
|
||||||
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Str>> {
|
) -> Result<RoPrefix<'a, FieldDocIdFacetCodec<ByteSliceRefCodec>, Str>> {
|
||||||
let key = facet_values_prefix_key(distinct, docid);
|
let key = facet_values_prefix_key(field_id, docid);
|
||||||
|
|
||||||
let iter = index
|
let iter = index
|
||||||
.field_id_docid_facet_strings
|
.field_id_docid_facet_strings
|
||||||
|
@ -45,7 +45,7 @@ use super::interner::MappedInterner;
|
|||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::query_graph::QueryNode;
|
use super::query_graph::QueryNode;
|
||||||
use super::ranking_rule_graph::{
|
use super::ranking_rule_graph::{
|
||||||
DeadEndPathCache, EdgeCondition, EdgeConditionDocIdsCache, ProximityGraph, RankingRuleGraph,
|
DeadEndPathCache, EdgeConditionDocIdsCache, ProximityGraph, RankingRuleGraph,
|
||||||
RankingRuleGraphTrait, TypoGraph,
|
RankingRuleGraphTrait, TypoGraph,
|
||||||
};
|
};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
@ -87,7 +87,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
/// Cache to retrieve the docids associated with each edge
|
/// Cache to retrieve the docids associated with each edge
|
||||||
edge_conditions_cache: EdgeConditionDocIdsCache<G>,
|
edge_conditions_cache: EdgeConditionDocIdsCache<G>,
|
||||||
/// Cache used to optimistically discard paths that resolve to no documents.
|
/// Cache used to optimistically discard paths that resolve to no documents.
|
||||||
empty_paths_cache: DeadEndPathCache<G>,
|
dead_end_path_cache: DeadEndPathCache<G>,
|
||||||
/// A structure giving the list of possible costs from each node to the end node,
|
/// A structure giving the list of possible costs from each node to the end node,
|
||||||
/// along with a set of unavoidable edges that must be traversed to achieve that distance.
|
/// along with a set of unavoidable edges that must be traversed to achieve that distance.
|
||||||
all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||||
@ -101,27 +101,23 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>(
|
fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>(
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
graph: &mut RankingRuleGraph<G>,
|
graph: &mut RankingRuleGraph<G>,
|
||||||
edge_docids_cache: &mut EdgeConditionDocIdsCache<G>,
|
condition_docids_cache: &mut EdgeConditionDocIdsCache<G>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
empty_paths_cache: &mut DeadEndPathCache<G>,
|
dead_end_path_cache: &mut DeadEndPathCache<G>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for edge_id in graph.edges_store.indexes() {
|
for edge_id in graph.edges_store.indexes() {
|
||||||
let Some(edge) = graph.edges_store.get(edge_id).as_ref() else {
|
let Some(edge) = graph.edges_store.get(edge_id).as_ref() else {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let condition = edge.condition;
|
let Some(condition) = edge.condition else { continue };
|
||||||
|
|
||||||
match condition {
|
let docids =
|
||||||
EdgeCondition::Unconditional => continue,
|
condition_docids_cache.get_condition_docids(ctx, condition, graph, universe)?;
|
||||||
EdgeCondition::Conditional(condition) => {
|
if docids.is_disjoint(universe) {
|
||||||
let docids = edge_docids_cache.get_edge_docids(ctx, condition, graph, universe)?;
|
graph.remove_edges_with_condition(condition);
|
||||||
if docids.is_disjoint(universe) {
|
dead_end_path_cache.add_condition(condition);
|
||||||
graph.remove_edges_with_condition(condition);
|
condition_docids_cache.cache.remove(&condition);
|
||||||
empty_paths_cache.add_condition(condition);
|
continue;
|
||||||
edge_docids_cache.cache.remove(&condition);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -139,17 +135,17 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
query_graph: &QueryGraph,
|
query_graph: &QueryGraph,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
||||||
let mut edge_docids_cache = EdgeConditionDocIdsCache::default();
|
let mut condition_docids_cache = EdgeConditionDocIdsCache::default();
|
||||||
let mut empty_paths_cache = DeadEndPathCache::new(&graph.conditions_interner);
|
let mut dead_end_path_cache = DeadEndPathCache::new(&graph.conditions_interner);
|
||||||
|
|
||||||
// First simplify the graph as much as possible, by computing the docids of the edges
|
// First simplify the graph as much as possible, by computing the docids of all the conditions
|
||||||
// within the rule's universe and removing the edges that have no associated docids.
|
// within the rule's universe and removing the edges that have no associated docids.
|
||||||
remove_empty_edges(
|
remove_empty_edges(
|
||||||
ctx,
|
ctx,
|
||||||
&mut graph,
|
&mut graph,
|
||||||
&mut edge_docids_cache,
|
&mut condition_docids_cache,
|
||||||
universe,
|
universe,
|
||||||
&mut empty_paths_cache,
|
&mut dead_end_path_cache,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// Then pre-compute the cost of all paths from each node to the end node
|
// Then pre-compute the cost of all paths from each node to the end node
|
||||||
@ -157,8 +153,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
|
|
||||||
let state = GraphBasedRankingRuleState {
|
let state = GraphBasedRankingRuleState {
|
||||||
graph,
|
graph,
|
||||||
edge_conditions_cache: edge_docids_cache,
|
edge_conditions_cache: condition_docids_cache,
|
||||||
empty_paths_cache,
|
dead_end_path_cache,
|
||||||
all_distances,
|
all_distances,
|
||||||
cur_distance_idx: 0,
|
cur_distance_idx: 0,
|
||||||
};
|
};
|
||||||
@ -187,7 +183,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
&mut state.graph,
|
&mut state.graph,
|
||||||
&mut state.edge_conditions_cache,
|
&mut state.edge_conditions_cache,
|
||||||
universe,
|
universe,
|
||||||
&mut state.empty_paths_cache,
|
&mut state.dead_end_path_cache,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// If the cur_distance_idx does not point to a valid cost in the `all_distances`
|
// If the cur_distance_idx does not point to a valid cost in the `all_distances`
|
||||||
@ -208,8 +204,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
|
|
||||||
let GraphBasedRankingRuleState {
|
let GraphBasedRankingRuleState {
|
||||||
graph,
|
graph,
|
||||||
edge_conditions_cache: edge_docids_cache,
|
edge_conditions_cache: condition_docids_cache,
|
||||||
empty_paths_cache,
|
dead_end_path_cache,
|
||||||
all_distances,
|
all_distances,
|
||||||
cur_distance_idx: _,
|
cur_distance_idx: _,
|
||||||
} = &mut state;
|
} = &mut state;
|
||||||
@ -224,18 +220,18 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
// For each path of the given cost, we will compute its associated
|
// For each path of the given cost, we will compute its associated
|
||||||
// document ids.
|
// document ids.
|
||||||
// In case the path does not resolve to any document id, we try to figure out why
|
// In case the path does not resolve to any document id, we try to figure out why
|
||||||
// and update the `empty_paths_cache` accordingly.
|
// and update the `dead_end_path_cache` accordingly.
|
||||||
// For example, it may be that the path is empty because one of its edges is disjoint
|
// For example, it may be that the path is empty because one of its edges is disjoint
|
||||||
// with the universe, or because a prefix of the path is disjoint with the universe, or because
|
// with the universe, or because a prefix of the path is disjoint with the universe, or because
|
||||||
// the path contains two edges that are disjoint from each other within the universe.
|
// the path contains two edges that are disjoint from each other within the universe.
|
||||||
// Updating the empty_paths_cache helps speed up the execution of `visit_paths_of_cost` and reduces
|
// Updating the dead_end_path_cache helps speed up the execution of `visit_paths_of_cost` and reduces
|
||||||
// the number of future candidate paths given by that same function.
|
// the number of future candidate paths given by that same function.
|
||||||
graph.visit_paths_of_cost(
|
graph.visit_paths_of_cost(
|
||||||
graph.query_graph.root_node,
|
graph.query_graph.root_node,
|
||||||
cost,
|
cost,
|
||||||
all_distances,
|
all_distances,
|
||||||
empty_paths_cache,
|
dead_end_path_cache,
|
||||||
|path, graph, empty_paths_cache| {
|
|path, graph, dead_end_path_cache| {
|
||||||
// Accumulate the path for logging purposes only
|
// Accumulate the path for logging purposes only
|
||||||
paths.push(path.to_vec());
|
paths.push(path.to_vec());
|
||||||
let mut path_docids = universe.clone();
|
let mut path_docids = universe.clone();
|
||||||
@ -243,47 +239,48 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
// We store the edges and their docids in vectors in case the path turns out to be
|
// We store the edges and their docids in vectors in case the path turns out to be
|
||||||
// empty and we need to figure out why it was empty.
|
// empty and we need to figure out why it was empty.
|
||||||
let mut visited_conditions = vec![];
|
let mut visited_conditions = vec![];
|
||||||
let mut cached_edge_docids = vec![];
|
let mut cached_condition_docids = vec![];
|
||||||
// graph.conditions_interner.map(|_| RoaringBitmap::new());
|
// graph.conditions_interner.map(|_| RoaringBitmap::new());
|
||||||
|
|
||||||
for &condition in path {
|
for &condition in path {
|
||||||
visited_conditions.push(condition);
|
visited_conditions.push(condition);
|
||||||
|
|
||||||
let edge_docids =
|
let condition_docids = condition_docids_cache
|
||||||
edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?;
|
.get_condition_docids(ctx, condition, graph, &universe)?;
|
||||||
|
|
||||||
cached_edge_docids.push((condition, edge_docids.clone())); // .get_mut(condition) = edge_docids.clone();
|
cached_condition_docids.push((condition, condition_docids.clone())); // .get_mut(condition) = condition_docids.clone();
|
||||||
|
|
||||||
// If the edge is empty, then the path will be empty as well, we update the graph
|
// If the edge is empty, then the path will be empty as well, we update the graph
|
||||||
// and caches accordingly and skip to the next candidate path.
|
// and caches accordingly and skip to the next candidate path.
|
||||||
if edge_docids.is_disjoint(&universe) {
|
if condition_docids.is_disjoint(&universe) {
|
||||||
// 1. Store in the cache that this edge is empty for this universe
|
// 1. Store in the cache that this edge is empty for this universe
|
||||||
empty_paths_cache.add_condition(condition);
|
dead_end_path_cache.add_condition(condition);
|
||||||
// 2. remove this edge from the ranking rule graph
|
// 2. remove this edge from the ranking rule graph
|
||||||
// ouch, no! :( need to link a condition to one or more ranking rule edges
|
// ouch, no! :( need to link a condition to one or more ranking rule edges
|
||||||
graph.remove_edges_with_condition(condition);
|
graph.remove_edges_with_condition(condition);
|
||||||
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
|
// 3. Also remove the entry from the condition_docids_cache, since we don't need it anymore
|
||||||
edge_docids_cache.cache.remove(&condition);
|
condition_docids_cache.cache.remove(&condition);
|
||||||
return Ok(ControlFlow::Continue(()));
|
return Ok(ControlFlow::Continue(()));
|
||||||
}
|
}
|
||||||
path_docids &= edge_docids;
|
path_docids &= condition_docids;
|
||||||
|
|
||||||
// If the (sub)path is empty, we try to figure out why and update the caches accordingly.
|
// If the (sub)path is empty, we try to figure out why and update the caches accordingly.
|
||||||
if path_docids.is_disjoint(&universe) {
|
if path_docids.is_disjoint(&universe) {
|
||||||
// First, we know that this path is empty, and thus any path
|
// First, we know that this path is empty, and thus any path
|
||||||
// that is a superset of it will also be empty.
|
// that is a superset of it will also be empty.
|
||||||
empty_paths_cache.add_prefix(&visited_conditions);
|
dead_end_path_cache.add_prefix(&visited_conditions);
|
||||||
// Second, if the intersection between this edge and any
|
// Second, if the intersection between this edge and any
|
||||||
// previous one is disjoint with the universe,
|
// previous one is disjoint with the universe,
|
||||||
// then we also know that any path containing the same couple of
|
// then we also know that any path containing the same couple of
|
||||||
// edges will also be empty.
|
// edges will also be empty.
|
||||||
for (past_condition, edge_docids2) in cached_edge_docids.iter() {
|
for (past_condition, condition_docids2) in cached_condition_docids.iter() {
|
||||||
if *past_condition == condition {
|
if *past_condition == condition {
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let intersection = edge_docids & edge_docids2;
|
let intersection = condition_docids & condition_docids2;
|
||||||
if intersection.is_disjoint(&universe) {
|
if intersection.is_disjoint(&universe) {
|
||||||
empty_paths_cache.add_condition_couple(*past_condition, condition);
|
dead_end_path_cache
|
||||||
|
.add_condition_couple(*past_condition, condition);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// We should maybe instead try to compute:
|
// We should maybe instead try to compute:
|
||||||
@ -310,7 +307,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
G::log_state(
|
G::log_state(
|
||||||
&original_graph,
|
&original_graph,
|
||||||
&paths,
|
&paths,
|
||||||
empty_paths_cache,
|
dead_end_path_cache,
|
||||||
original_universe,
|
original_universe,
|
||||||
all_distances,
|
all_distances,
|
||||||
cost,
|
cost,
|
||||||
@ -322,8 +319,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
// But we only do it in case the bucket length is >1, because otherwise
|
// But we only do it in case the bucket length is >1, because otherwise
|
||||||
// we know the child ranking rule won't be called anyway
|
// we know the child ranking rule won't be called anyway
|
||||||
let mut next_query_graph = original_graph.query_graph;
|
let mut next_query_graph = original_graph.query_graph;
|
||||||
next_query_graph.simplify();
|
|
||||||
if bucket.len() > 1 {
|
if bucket.len() > 1 {
|
||||||
|
next_query_graph.simplify();
|
||||||
// 1. Gather all the words and phrases used in the computation of this bucket
|
// 1. Gather all the words and phrases used in the computation of this bucket
|
||||||
let mut used_words = HashSet::new();
|
let mut used_words = HashSet::new();
|
||||||
let mut used_phrases = HashSet::new();
|
let mut used_phrases = HashSet::new();
|
||||||
|
@ -25,6 +25,8 @@ impl<T> Interned<T> {
|
|||||||
/// be copied, compared, and hashed efficiently. An immutable reference to the original value
|
/// be copied, compared, and hashed efficiently. An immutable reference to the original value
|
||||||
/// can be retrieved using `self.get(interned)`. A set of values within the interner can be
|
/// can be retrieved using `self.get(interned)`. A set of values within the interner can be
|
||||||
/// efficiently managed using [`SmallBitmap<T>`](super::small_bitmap::SmallBitmap).
|
/// efficiently managed using [`SmallBitmap<T>`](super::small_bitmap::SmallBitmap).
|
||||||
|
///
|
||||||
|
/// A dedup-interner can contain a maximum of `u16::MAX` values.
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct DedupInterner<T> {
|
pub struct DedupInterner<T> {
|
||||||
stable_store: Vec<T>,
|
stable_store: Vec<T>,
|
||||||
@ -36,7 +38,8 @@ impl<T> Default for DedupInterner<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<T> DedupInterner<T> {
|
impl<T> DedupInterner<T> {
|
||||||
///
|
/// Convert the dedup-interner into a fixed-size interner, such that new
|
||||||
|
/// elements cannot be added to it anymore.
|
||||||
pub fn freeze(self) -> FixedSizeInterner<T> {
|
pub fn freeze(self) -> FixedSizeInterner<T> {
|
||||||
FixedSizeInterner { stable_store: self.stable_store }
|
FixedSizeInterner { stable_store: self.stable_store }
|
||||||
}
|
}
|
||||||
@ -46,6 +49,8 @@ impl<T> DedupInterner<T>
|
|||||||
where
|
where
|
||||||
T: Clone + Eq + Hash,
|
T: Clone + Eq + Hash,
|
||||||
{
|
{
|
||||||
|
/// Insert the given value into the dedup-interner, and return
|
||||||
|
/// its index.
|
||||||
pub fn insert(&mut self, s: T) -> Interned<T> {
|
pub fn insert(&mut self, s: T) -> Interned<T> {
|
||||||
if let Some(interned) = self.lookup.get(&s) {
|
if let Some(interned) = self.lookup.get(&s) {
|
||||||
*interned
|
*interned
|
||||||
@ -57,35 +62,21 @@ where
|
|||||||
interned
|
interned
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
/// Get a reference to the interned value.
|
||||||
pub fn get(&self, interned: Interned<T>) -> &T {
|
pub fn get(&self, interned: Interned<T>) -> &T {
|
||||||
&self.stable_store[interned.idx as usize]
|
&self.stable_store[interned.idx as usize]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[derive(Clone)]
|
|
||||||
pub struct Interner<T> {
|
|
||||||
stable_store: Vec<T>,
|
|
||||||
}
|
|
||||||
impl<T> Default for Interner<T> {
|
|
||||||
fn default() -> Self {
|
|
||||||
Self { stable_store: Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
impl<T> Interner<T> {
|
|
||||||
pub fn freeze(self) -> FixedSizeInterner<T> {
|
|
||||||
FixedSizeInterner { stable_store: self.stable_store }
|
|
||||||
}
|
|
||||||
pub fn push(&mut self, s: T) -> Interned<T> {
|
|
||||||
assert!(self.stable_store.len() < u16::MAX as usize);
|
|
||||||
self.stable_store.push(s);
|
|
||||||
Interned::from_raw(self.stable_store.len() as u16 - 1)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
/// A fixed-length store for values of type `T`, where each value is identified
|
||||||
|
/// by an index of type [`Interned<T>`].
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct FixedSizeInterner<T> {
|
pub struct FixedSizeInterner<T> {
|
||||||
stable_store: Vec<T>,
|
stable_store: Vec<T>,
|
||||||
}
|
}
|
||||||
impl<T: Clone> FixedSizeInterner<T> {
|
impl<T: Clone> FixedSizeInterner<T> {
|
||||||
|
/// Create a fixed-size interner of the given length containing
|
||||||
|
/// clones of the given value.
|
||||||
pub fn new(length: u16, value: T) -> Self {
|
pub fn new(length: u16, value: T) -> Self {
|
||||||
Self { stable_store: vec![value; length as usize] }
|
Self { stable_store: vec![value; length as usize] }
|
||||||
}
|
}
|
||||||
@ -105,7 +96,6 @@ impl<T> FixedSizeInterner<T> {
|
|||||||
pub fn len(&self) -> u16 {
|
pub fn len(&self) -> u16 {
|
||||||
self.stable_store.len() as u16
|
self.stable_store.len() as u16
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<U, T> {
|
pub fn map<U>(&self, map_f: impl Fn(&T) -> U) -> MappedInterner<U, T> {
|
||||||
MappedInterner {
|
MappedInterner {
|
||||||
stable_store: self.stable_store.iter().map(map_f).collect(),
|
stable_store: self.stable_store.iter().map(map_f).collect(),
|
||||||
@ -122,6 +112,12 @@ impl<T> FixedSizeInterner<T> {
|
|||||||
self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
|
self.stable_store.iter_mut().enumerate().map(|(i, x)| (Interned::from_raw(i as u16), x))
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// A store of values of type `T`, each linked to a value of type `From`
|
||||||
|
/// stored in another interner. To create a mapped interner, use the
|
||||||
|
/// `map` method on [`FixedSizeInterner`] or [`MappedInterner`].
|
||||||
|
///
|
||||||
|
/// Values in this interner are indexed with [`Interned<From>`].
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct MappedInterner<T, From> {
|
pub struct MappedInterner<T, From> {
|
||||||
stable_store: Vec<T>,
|
stable_store: Vec<T>,
|
||||||
|
@ -10,7 +10,7 @@ use crate::search::new::interner::{Interned, MappedInterner};
|
|||||||
use crate::search::new::query_graph::QueryNodeData;
|
use crate::search::new::query_graph::QueryNodeData;
|
||||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
|
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
|
||||||
use crate::search::new::ranking_rule_graph::{
|
use crate::search::new::ranking_rule_graph::{
|
||||||
DeadEndPathCache, Edge, EdgeCondition, ProximityCondition, ProximityGraph, RankingRuleGraph,
|
DeadEndPathCache, Edge, ProximityCondition, ProximityGraph, RankingRuleGraph,
|
||||||
RankingRuleGraphTrait, TypoEdge, TypoGraph,
|
RankingRuleGraphTrait, TypoEdge, TypoGraph,
|
||||||
};
|
};
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
@ -44,7 +44,7 @@ pub enum SearchEvents {
|
|||||||
ProximityState {
|
ProximityState {
|
||||||
graph: RankingRuleGraph<ProximityGraph>,
|
graph: RankingRuleGraph<ProximityGraph>,
|
||||||
paths: Vec<Vec<Interned<ProximityCondition>>>,
|
paths: Vec<Vec<Interned<ProximityCondition>>>,
|
||||||
empty_paths_cache: DeadEndPathCache<ProximityGraph>,
|
dead_end_path_cache: DeadEndPathCache<ProximityGraph>,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
distances: MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
@ -52,7 +52,7 @@ pub enum SearchEvents {
|
|||||||
TypoState {
|
TypoState {
|
||||||
graph: RankingRuleGraph<TypoGraph>,
|
graph: RankingRuleGraph<TypoGraph>,
|
||||||
paths: Vec<Vec<Interned<TypoEdge>>>,
|
paths: Vec<Vec<Interned<TypoEdge>>>,
|
||||||
empty_paths_cache: DeadEndPathCache<TypoGraph>,
|
dead_end_path_cache: DeadEndPathCache<TypoGraph>,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
distances: MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
distances: MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
@ -170,7 +170,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||||
paths_map: &[Vec<Interned<ProximityCondition>>],
|
paths_map: &[Vec<Interned<ProximityCondition>>],
|
||||||
empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
@ -178,7 +178,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
self.events.push(SearchEvents::ProximityState {
|
self.events.push(SearchEvents::ProximityState {
|
||||||
graph: query_graph.clone(),
|
graph: query_graph.clone(),
|
||||||
paths: paths_map.to_vec(),
|
paths: paths_map.to_vec(),
|
||||||
empty_paths_cache: empty_paths_cache.clone(),
|
dead_end_path_cache: dead_end_path_cache.clone(),
|
||||||
universe: universe.clone(),
|
universe: universe.clone(),
|
||||||
distances: distances.clone(),
|
distances: distances.clone(),
|
||||||
cost,
|
cost,
|
||||||
@ -189,7 +189,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
paths_map: &[Vec<Interned<TypoEdge>>],
|
paths_map: &[Vec<Interned<TypoEdge>>],
|
||||||
empty_paths_cache: &DeadEndPathCache<TypoGraph>,
|
dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
@ -197,7 +197,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
self.events.push(SearchEvents::TypoState {
|
self.events.push(SearchEvents::TypoState {
|
||||||
graph: query_graph.clone(),
|
graph: query_graph.clone(),
|
||||||
paths: paths_map.to_vec(),
|
paths: paths_map.to_vec(),
|
||||||
empty_paths_cache: empty_paths_cache.clone(),
|
dead_end_path_cache: dead_end_path_cache.clone(),
|
||||||
universe: universe.clone(),
|
universe: universe.clone(),
|
||||||
distances: distances.clone(),
|
distances: distances.clone(),
|
||||||
cost,
|
cost,
|
||||||
@ -358,7 +358,7 @@ results.{random} {{
|
|||||||
SearchEvents::ProximityState {
|
SearchEvents::ProximityState {
|
||||||
graph,
|
graph,
|
||||||
paths,
|
paths,
|
||||||
empty_paths_cache,
|
dead_end_path_cache,
|
||||||
universe,
|
universe,
|
||||||
distances,
|
distances,
|
||||||
cost,
|
cost,
|
||||||
@ -374,7 +374,7 @@ results.{random} {{
|
|||||||
ctx,
|
ctx,
|
||||||
graph,
|
graph,
|
||||||
paths,
|
paths,
|
||||||
empty_paths_cache,
|
dead_end_path_cache,
|
||||||
distances.clone(),
|
distances.clone(),
|
||||||
&mut new_file,
|
&mut new_file,
|
||||||
);
|
);
|
||||||
@ -391,7 +391,7 @@ results.{random} {{
|
|||||||
SearchEvents::TypoState {
|
SearchEvents::TypoState {
|
||||||
graph,
|
graph,
|
||||||
paths,
|
paths,
|
||||||
empty_paths_cache,
|
dead_end_path_cache,
|
||||||
universe,
|
universe,
|
||||||
distances,
|
distances,
|
||||||
cost,
|
cost,
|
||||||
@ -407,7 +407,7 @@ results.{random} {{
|
|||||||
ctx,
|
ctx,
|
||||||
graph,
|
graph,
|
||||||
paths,
|
paths,
|
||||||
empty_paths_cache,
|
dead_end_path_cache,
|
||||||
distances.clone(),
|
distances.clone(),
|
||||||
&mut new_file,
|
&mut new_file,
|
||||||
);
|
);
|
||||||
@ -547,11 +547,11 @@ shape: class"
|
|||||||
let Edge { source_node, dest_node, condition: details, cost } = edge;
|
let Edge { source_node, dest_node, condition: details, cost } = edge;
|
||||||
|
|
||||||
match &details {
|
match &details {
|
||||||
EdgeCondition::Unconditional => {
|
None => {
|
||||||
writeln!(file, "{source_node} -> {dest_node} : \"always cost {cost}\"",)
|
writeln!(file, "{source_node} -> {dest_node} : \"always cost {cost}\"",)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
EdgeCondition::Conditional(condition) => {
|
Some(condition) => {
|
||||||
// let condition = graph.conditions_interner.get(*condition);
|
// let condition = graph.conditions_interner.get(*condition);
|
||||||
writeln!(
|
writeln!(
|
||||||
file,
|
file,
|
||||||
|
@ -66,7 +66,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||||
paths: &[Vec<Interned<ProximityCondition>>],
|
paths: &[Vec<Interned<ProximityCondition>>],
|
||||||
empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
@ -77,7 +77,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
paths: &[Vec<Interned<TypoEdge>>],
|
paths: &[Vec<Interned<TypoEdge>>],
|
||||||
empty_paths_cache: &DeadEndPathCache<TypoGraph>,
|
dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
@ -137,7 +137,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
|||||||
&mut self,
|
&mut self,
|
||||||
_query_graph: &RankingRuleGraph<ProximityGraph>,
|
_query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||||
_paths_map: &[Vec<Interned<ProximityCondition>>],
|
_paths_map: &[Vec<Interned<ProximityCondition>>],
|
||||||
_empty_paths_cache: &DeadEndPathCache<ProximityGraph>,
|
_dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
|
||||||
_universe: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
_distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
_distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||||
_cost: u16,
|
_cost: u16,
|
||||||
@ -148,7 +148,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
|||||||
&mut self,
|
&mut self,
|
||||||
_query_graph: &RankingRuleGraph<TypoGraph>,
|
_query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
_paths: &[Vec<Interned<TypoEdge>>],
|
_paths: &[Vec<Interned<TypoEdge>>],
|
||||||
_empty_paths_cache: &DeadEndPathCache<TypoGraph>,
|
_dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
|
||||||
_universe: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
_distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
_distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||||
_cost: u16,
|
_cost: u16,
|
||||||
|
@ -36,6 +36,7 @@ use crate::search::new::query_term::located_query_terms_from_string;
|
|||||||
use crate::search::new::words::Words;
|
use crate::search::new::words::Words;
|
||||||
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
||||||
|
|
||||||
|
/// A structure used throughout the execution of a search query.
|
||||||
pub struct SearchContext<'ctx> {
|
pub struct SearchContext<'ctx> {
|
||||||
pub index: &'ctx Index,
|
pub index: &'ctx Index,
|
||||||
pub txn: &'ctx RoTxn<'ctx>,
|
pub txn: &'ctx RoTxn<'ctx>,
|
||||||
@ -59,6 +60,7 @@ impl<'ctx> SearchContext<'ctx> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Apply the [`TermsMatchingStrategy`] to the query graph and resolve it.
|
||||||
#[allow(clippy::too_many_arguments)]
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn resolve_maximally_reduced_query_graph<'ctx>(
|
fn resolve_maximally_reduced_query_graph<'ctx>(
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
@ -100,6 +102,8 @@ fn resolve_maximally_reduced_query_graph<'ctx>(
|
|||||||
|
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the list of initialised ranking rules to be used for a placeholder search.
|
||||||
fn get_ranking_rules_for_placeholder_search<'ctx>(
|
fn get_ranking_rules_for_placeholder_search<'ctx>(
|
||||||
ctx: &SearchContext<'ctx>,
|
ctx: &SearchContext<'ctx>,
|
||||||
) -> Result<Vec<Box<dyn RankingRule<'ctx, PlaceholderQuery>>>> {
|
) -> Result<Vec<Box<dyn RankingRule<'ctx, PlaceholderQuery>>>> {
|
||||||
@ -123,6 +127,8 @@ fn get_ranking_rules_for_placeholder_search<'ctx>(
|
|||||||
}
|
}
|
||||||
Ok(ranking_rules)
|
Ok(ranking_rules)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Return the list of initialised ranking rules to be used for a query graph search.
|
||||||
fn get_ranking_rules_for_query_graph_search<'ctx>(
|
fn get_ranking_rules_for_query_graph_search<'ctx>(
|
||||||
ctx: &SearchContext<'ctx>,
|
ctx: &SearchContext<'ctx>,
|
||||||
terms_matching_strategy: TermsMatchingStrategy,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::{DedupInterner, Interner};
|
use crate::search::new::interner::DedupInterner;
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use crate::search::new::{QueryGraph, SearchContext};
|
use crate::search::new::{QueryGraph, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@ -19,7 +19,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
|
|
||||||
let mut conditions_interner = DedupInterner::default();
|
let mut conditions_interner = DedupInterner::default();
|
||||||
|
|
||||||
let mut edges_store = Interner::default();
|
let mut edges_store = DedupInterner::default();
|
||||||
let mut edges_of_node = query_graph.nodes.map(|_| HashSet::new());
|
let mut edges_of_node = query_graph.nodes.map(|_| HashSet::new());
|
||||||
|
|
||||||
for (source_id, source_node) in graph_nodes.iter() {
|
for (source_id, source_node) in graph_nodes.iter() {
|
||||||
@ -33,7 +33,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
for (cost, condition) in edges {
|
for (cost, condition) in edges {
|
||||||
let new_edge_id = edges_store.push(Some(Edge {
|
let new_edge_id = edges_store.insert(Some(Edge {
|
||||||
source_node: source_id,
|
source_node: source_id,
|
||||||
dest_node: dest_idx,
|
dest_node: dest_idx,
|
||||||
cost,
|
cost,
|
||||||
|
@ -4,8 +4,8 @@ use std::collections::btree_map::Entry;
|
|||||||
use std::collections::{BTreeMap, VecDeque};
|
use std::collections::{BTreeMap, VecDeque};
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
use super::empty_paths_cache::DeadEndPathCache;
|
use super::dead_end_path_cache::DeadEndPathCache;
|
||||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::{Interned, MappedInterner};
|
use crate::search::new::interner::{Interned, MappedInterner};
|
||||||
use crate::search::new::query_graph::QueryNode;
|
use crate::search::new::query_graph::QueryNode;
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
@ -23,7 +23,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
from: Interned<QueryNode>,
|
from: Interned<QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||||
empty_paths_cache: &mut DeadEndPathCache<G>,
|
dead_end_path_cache: &mut DeadEndPathCache<G>,
|
||||||
mut visit: impl FnMut(
|
mut visit: impl FnMut(
|
||||||
&[Interned<G::EdgeCondition>],
|
&[Interned<G::EdgeCondition>],
|
||||||
&mut Self,
|
&mut Self,
|
||||||
@ -34,11 +34,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
from,
|
from,
|
||||||
cost,
|
cost,
|
||||||
all_distances,
|
all_distances,
|
||||||
empty_paths_cache,
|
dead_end_path_cache,
|
||||||
&mut visit,
|
&mut visit,
|
||||||
&mut vec![],
|
&mut vec![],
|
||||||
&mut SmallBitmap::for_interned_values_in(&self.conditions_interner),
|
&mut SmallBitmap::for_interned_values_in(&self.conditions_interner),
|
||||||
&mut empty_paths_cache.conditions.clone(),
|
&mut dead_end_path_cache.conditions.clone(),
|
||||||
)?;
|
)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -47,7 +47,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
from: Interned<QueryNode>,
|
from: Interned<QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::EdgeCondition>)>, QueryNode>,
|
||||||
empty_paths_cache: &mut DeadEndPathCache<G>,
|
dead_end_path_cache: &mut DeadEndPathCache<G>,
|
||||||
visit: &mut impl FnMut(
|
visit: &mut impl FnMut(
|
||||||
&[Interned<G::EdgeCondition>],
|
&[Interned<G::EdgeCondition>],
|
||||||
&mut Self,
|
&mut Self,
|
||||||
@ -66,10 +66,10 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let next_any_valid = match edge.condition {
|
let next_any_valid = match edge.condition {
|
||||||
EdgeCondition::Unconditional => {
|
None => {
|
||||||
if edge.dest_node == self.query_graph.end_node {
|
if edge.dest_node == self.query_graph.end_node {
|
||||||
any_valid = true;
|
any_valid = true;
|
||||||
let control_flow = visit(prev_conditions, self, empty_paths_cache)?;
|
let control_flow = visit(prev_conditions, self, dead_end_path_cache)?;
|
||||||
match control_flow {
|
match control_flow {
|
||||||
ControlFlow::Continue(_) => {}
|
ControlFlow::Continue(_) => {}
|
||||||
ControlFlow::Break(_) => return Ok(true),
|
ControlFlow::Break(_) => return Ok(true),
|
||||||
@ -80,7 +80,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
edge.dest_node,
|
edge.dest_node,
|
||||||
cost - edge.cost as u16,
|
cost - edge.cost as u16,
|
||||||
all_distances,
|
all_distances,
|
||||||
empty_paths_cache,
|
dead_end_path_cache,
|
||||||
visit,
|
visit,
|
||||||
prev_conditions,
|
prev_conditions,
|
||||||
cur_path,
|
cur_path,
|
||||||
@ -88,7 +88,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
)?
|
)?
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
EdgeCondition::Conditional(condition) => {
|
Some(condition) => {
|
||||||
if forbidden_conditions.contains(condition)
|
if forbidden_conditions.contains(condition)
|
||||||
|| !all_distances.get(edge.dest_node).iter().any(
|
|| !all_distances.get(edge.dest_node).iter().any(
|
||||||
|(next_cost, necessary_conditions)| {
|
|(next_cost, necessary_conditions)| {
|
||||||
@ -104,8 +104,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
|
|
||||||
let mut new_forbidden_conditions = forbidden_conditions.clone();
|
let mut new_forbidden_conditions = forbidden_conditions.clone();
|
||||||
new_forbidden_conditions
|
new_forbidden_conditions
|
||||||
.union(empty_paths_cache.condition_couples.get(condition));
|
.union(dead_end_path_cache.condition_couples.get(condition));
|
||||||
empty_paths_cache.prefixes.final_edges_after_prefix(
|
dead_end_path_cache.prefixes.final_edges_after_prefix(
|
||||||
prev_conditions,
|
prev_conditions,
|
||||||
&mut |x| {
|
&mut |x| {
|
||||||
new_forbidden_conditions.insert(x);
|
new_forbidden_conditions.insert(x);
|
||||||
@ -113,7 +113,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
);
|
);
|
||||||
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
||||||
any_valid = true;
|
any_valid = true;
|
||||||
let control_flow = visit(prev_conditions, self, empty_paths_cache)?;
|
let control_flow = visit(prev_conditions, self, dead_end_path_cache)?;
|
||||||
match control_flow {
|
match control_flow {
|
||||||
ControlFlow::Continue(_) => {}
|
ControlFlow::Continue(_) => {}
|
||||||
ControlFlow::Break(_) => return Ok(true),
|
ControlFlow::Break(_) => return Ok(true),
|
||||||
@ -124,7 +124,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
edge.dest_node,
|
edge.dest_node,
|
||||||
cost - edge.cost as u16,
|
cost - edge.cost as u16,
|
||||||
all_distances,
|
all_distances,
|
||||||
empty_paths_cache,
|
dead_end_path_cache,
|
||||||
visit,
|
visit,
|
||||||
prev_conditions,
|
prev_conditions,
|
||||||
cur_path,
|
cur_path,
|
||||||
@ -139,15 +139,15 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
any_valid |= next_any_valid;
|
any_valid |= next_any_valid;
|
||||||
|
|
||||||
if next_any_valid {
|
if next_any_valid {
|
||||||
if empty_paths_cache.path_is_dead_end(prev_conditions, cur_path) {
|
if dead_end_path_cache.path_is_dead_end(prev_conditions, cur_path) {
|
||||||
return Ok(any_valid);
|
return Ok(any_valid);
|
||||||
}
|
}
|
||||||
forbidden_conditions.union(&empty_paths_cache.conditions);
|
forbidden_conditions.union(&dead_end_path_cache.conditions);
|
||||||
for prev_condition in prev_conditions.iter() {
|
for prev_condition in prev_conditions.iter() {
|
||||||
forbidden_conditions
|
forbidden_conditions
|
||||||
.union(empty_paths_cache.condition_couples.get(*prev_condition));
|
.union(dead_end_path_cache.condition_couples.get(*prev_condition));
|
||||||
}
|
}
|
||||||
empty_paths_cache.prefixes.final_edges_after_prefix(prev_conditions, &mut |x| {
|
dead_end_path_cache.prefixes.final_edges_after_prefix(prev_conditions, &mut |x| {
|
||||||
forbidden_conditions.insert(x);
|
forbidden_conditions.insert(x);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
@ -178,16 +178,14 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
||||||
for edge_idx in cur_node_edges.iter() {
|
for edge_idx in cur_node_edges.iter() {
|
||||||
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
||||||
let condition = match edge.condition {
|
|
||||||
EdgeCondition::Unconditional => None,
|
|
||||||
EdgeCondition::Conditional(condition) => Some(condition),
|
|
||||||
};
|
|
||||||
let succ_node = edge.dest_node;
|
let succ_node = edge.dest_node;
|
||||||
let succ_distances = distances_to_end.get(succ_node);
|
let succ_distances = distances_to_end.get(succ_node);
|
||||||
for (succ_distance, succ_necessary_conditions) in succ_distances {
|
for (succ_distance, succ_necessary_conditions) in succ_distances {
|
||||||
let mut potential_necessary_edges =
|
let mut potential_necessary_edges =
|
||||||
SmallBitmap::for_interned_values_in(&self.conditions_interner);
|
SmallBitmap::for_interned_values_in(&self.conditions_interner);
|
||||||
for condition in condition.into_iter().chain(succ_necessary_conditions.iter()) {
|
for condition in
|
||||||
|
edge.condition.into_iter().chain(succ_necessary_conditions.iter())
|
||||||
|
{
|
||||||
potential_necessary_edges.insert(condition);
|
potential_necessary_edges.insert(condition);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9,44 +9,43 @@ use crate::search::new::SearchContext;
|
|||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// A cache storing the document ids associated with each ranking rule edge
|
/// A cache storing the document ids associated with each ranking rule edge
|
||||||
pub struct EdgeConditionDocIdsCache<G: RankingRuleGraphTrait> {
|
pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
|
||||||
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
||||||
pub cache: FxHashMap<Interned<G::EdgeCondition>, RoaringBitmap>,
|
pub cache: FxHashMap<Interned<G::Condition>, RoaringBitmap>,
|
||||||
_phantom: PhantomData<G>,
|
_phantom: PhantomData<G>,
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> Default for EdgeConditionDocIdsCache<G> {
|
impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> {
|
||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
Self { cache: Default::default(), _phantom: Default::default() }
|
Self { cache: Default::default(), _phantom: Default::default() }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> EdgeConditionDocIdsCache<G> {
|
impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
|
||||||
/// Retrieve the document ids for the given edge condition.
|
/// Retrieve the document ids for the given edge condition.
|
||||||
///
|
///
|
||||||
/// If the cache does not yet contain these docids, they are computed
|
/// If the cache does not yet contain these docids, they are computed
|
||||||
/// and inserted in the cache.
|
/// and inserted in the cache.
|
||||||
pub fn get_edge_docids<'s, 'ctx>(
|
pub fn get_condition_docids<'s, 'ctx>(
|
||||||
&'s mut self,
|
&'s mut self,
|
||||||
ctx: &mut SearchContext<'ctx>,
|
ctx: &mut SearchContext<'ctx>,
|
||||||
// TODO: should be Interned<EdgeCondition>
|
interned_condition: Interned<G::Condition>,
|
||||||
interned_edge_condition: Interned<G::EdgeCondition>,
|
|
||||||
graph: &RankingRuleGraph<G>,
|
graph: &RankingRuleGraph<G>,
|
||||||
// TODO: maybe universe doesn't belong here
|
// TODO: maybe universe doesn't belong here
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<&'s RoaringBitmap> {
|
) -> Result<&'s RoaringBitmap> {
|
||||||
if self.cache.contains_key(&interned_edge_condition) {
|
if self.cache.contains_key(&interned_condition) {
|
||||||
// TODO: should we update the bitmap in the cache if the new universe
|
// TODO: should we update the bitmap in the cache if the new universe
|
||||||
// reduces it?
|
// reduces it?
|
||||||
// TODO: maybe have a generation: u32 to track every time the universe was
|
// TODO: maybe have a generation: u32 to track every time the universe was
|
||||||
// reduced. Then only attempt to recompute the intersection when there is a chance
|
// reduced. Then only attempt to recompute the intersection when there is a chance
|
||||||
// that edge_docids & universe changed
|
// that condition_docids & universe changed
|
||||||
return Ok(&self.cache[&interned_edge_condition]);
|
return Ok(&self.cache[&interned_condition]);
|
||||||
}
|
}
|
||||||
// TODO: maybe universe doesn't belong here
|
// TODO: maybe universe doesn't belong here
|
||||||
let edge_condition = graph.conditions_interner.get(interned_edge_condition);
|
let condition = graph.conditions_interner.get(interned_condition);
|
||||||
// TODO: faster way to do this?
|
// TODO: faster way to do this?
|
||||||
let docids = universe & G::resolve_edge_condition(ctx, edge_condition, universe)?;
|
let docids = universe & G::resolve_condition(ctx, condition, universe)?;
|
||||||
let _ = self.cache.insert(interned_edge_condition, docids);
|
let _ = self.cache.insert(interned_condition, docids);
|
||||||
let docids = &self.cache[&interned_edge_condition];
|
let docids = &self.cache[&interned_condition];
|
||||||
Ok(docids)
|
Ok(docids)
|
||||||
}
|
}
|
||||||
}
|
}
|
@ -9,11 +9,11 @@ use crate::search::new::{
|
|||||||
/// universe.
|
/// universe.
|
||||||
pub struct DeadEndPathCache<G: RankingRuleGraphTrait> {
|
pub struct DeadEndPathCache<G: RankingRuleGraphTrait> {
|
||||||
/// The set of edge conditions that resolve to no documents.
|
/// The set of edge conditions that resolve to no documents.
|
||||||
pub conditions: SmallBitmap<G::EdgeCondition>,
|
pub conditions: SmallBitmap<G::Condition>,
|
||||||
/// A set of path prefixes that resolve to no documents.
|
/// A set of path prefixes that resolve to no documents.
|
||||||
pub prefixes: PathSet<G::EdgeCondition>,
|
pub prefixes: PathSet<G::Condition>,
|
||||||
/// A set of empty couples of edge conditions that resolve to no documents.
|
/// A set of empty couples of edge conditions that resolve to no documents.
|
||||||
pub condition_couples: MappedInterner<SmallBitmap<G::EdgeCondition>, G::EdgeCondition>,
|
pub condition_couples: MappedInterner<SmallBitmap<G::Condition>, G::Condition>,
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
|
impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
|
||||||
fn clone(&self) -> Self {
|
fn clone(&self) -> Self {
|
||||||
@ -27,17 +27,17 @@ impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
|
|||||||
|
|
||||||
impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
||||||
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
||||||
pub fn new(all_edge_conditions: &FixedSizeInterner<G::EdgeCondition>) -> Self {
|
pub fn new(all_conditions: &FixedSizeInterner<G::Condition>) -> Self {
|
||||||
Self {
|
Self {
|
||||||
conditions: SmallBitmap::for_interned_values_in(all_edge_conditions),
|
conditions: SmallBitmap::for_interned_values_in(all_conditions),
|
||||||
prefixes: PathSet::default(),
|
prefixes: PathSet::default(),
|
||||||
condition_couples: all_edge_conditions
|
condition_couples: all_conditions
|
||||||
.map(|_| SmallBitmap::for_interned_values_in(all_edge_conditions)),
|
.map(|_| SmallBitmap::for_interned_values_in(all_conditions)),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Store in the cache that every path containing the given edge resolves to no documents.
|
/// Store in the cache that every path containing the given edge resolves to no documents.
|
||||||
pub fn add_condition(&mut self, condition: Interned<G::EdgeCondition>) {
|
pub fn add_condition(&mut self, condition: Interned<G::Condition>) {
|
||||||
self.conditions.insert(condition);
|
self.conditions.insert(condition);
|
||||||
self.condition_couples.get_mut(condition).clear();
|
self.condition_couples.get_mut(condition).clear();
|
||||||
self.prefixes.remove_edge(condition);
|
self.prefixes.remove_edge(condition);
|
||||||
@ -46,7 +46,7 @@ impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
/// Store in the cache that every path containing the given prefix resolves to no documents.
|
/// Store in the cache that every path containing the given prefix resolves to no documents.
|
||||||
pub fn add_prefix(&mut self, prefix: &[Interned<G::EdgeCondition>]) {
|
pub fn add_prefix(&mut self, prefix: &[Interned<G::Condition>]) {
|
||||||
// TODO: typed PathSet
|
// TODO: typed PathSet
|
||||||
self.prefixes.insert(prefix.iter().copied());
|
self.prefixes.insert(prefix.iter().copied());
|
||||||
}
|
}
|
||||||
@ -54,8 +54,8 @@ impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
|||||||
/// Store in the cache that every path containing the two given edges resolves to no documents.
|
/// Store in the cache that every path containing the two given edges resolves to no documents.
|
||||||
pub fn add_condition_couple(
|
pub fn add_condition_couple(
|
||||||
&mut self,
|
&mut self,
|
||||||
edge1: Interned<G::EdgeCondition>,
|
edge1: Interned<G::Condition>,
|
||||||
edge2: Interned<G::EdgeCondition>,
|
edge2: Interned<G::Condition>,
|
||||||
) {
|
) {
|
||||||
self.condition_couples.get_mut(edge1).insert(edge2);
|
self.condition_couples.get_mut(edge1).insert(edge2);
|
||||||
}
|
}
|
||||||
@ -63,8 +63,8 @@ impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
|||||||
/// Returns true if the cache can determine that the given path resolves to no documents.
|
/// Returns true if the cache can determine that the given path resolves to no documents.
|
||||||
pub fn path_is_dead_end(
|
pub fn path_is_dead_end(
|
||||||
&self,
|
&self,
|
||||||
path: &[Interned<G::EdgeCondition>],
|
path: &[Interned<G::Condition>],
|
||||||
path_bitmap: &SmallBitmap<G::EdgeCondition>,
|
path_bitmap: &SmallBitmap<G::Condition>,
|
||||||
) -> bool {
|
) -> bool {
|
||||||
if path_bitmap.intersects(&self.conditions) {
|
if path_bitmap.intersects(&self.conditions) {
|
||||||
return true;
|
return true;
|
@ -7,8 +7,8 @@ the same but the edges are replaced.
|
|||||||
|
|
||||||
mod build;
|
mod build;
|
||||||
mod cheapest_paths;
|
mod cheapest_paths;
|
||||||
mod edge_docids_cache;
|
mod condition_docids_cache;
|
||||||
mod empty_paths_cache;
|
mod dead_end_path_cache;
|
||||||
mod path_set;
|
mod path_set;
|
||||||
|
|
||||||
/// Implementation of the `proximity` ranking rule
|
/// Implementation of the `proximity` ranking rule
|
||||||
@ -19,8 +19,8 @@ mod typo;
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
|
|
||||||
pub use edge_docids_cache::EdgeConditionDocIdsCache;
|
pub use condition_docids_cache::EdgeConditionDocIdsCache;
|
||||||
pub use empty_paths_cache::DeadEndPathCache;
|
pub use dead_end_path_cache::DeadEndPathCache;
|
||||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
pub use typo::{TypoEdge, TypoGraph};
|
pub use typo::{TypoEdge, TypoGraph};
|
||||||
@ -32,31 +32,6 @@ use super::small_bitmap::SmallBitmap;
|
|||||||
use super::{QueryGraph, QueryNode, SearchContext};
|
use super::{QueryGraph, QueryNode, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
/// The condition that is associated with an edge in the ranking rule graph.
|
|
||||||
///
|
|
||||||
/// Some edges are unconditional, which means that traversing them does not reduce
|
|
||||||
/// the set of candidates.
|
|
||||||
///
|
|
||||||
/// Most edges, however, have a condition attached to them. For example, for the
|
|
||||||
/// proximity ranking rule, the condition could be that a word is N-close to another one.
|
|
||||||
/// When the edge is traversed, some database operations are executed to retrieve the set
|
|
||||||
/// of documents that satisfy the condition, which reduces the list of candidate document ids.
|
|
||||||
pub enum EdgeCondition<E> {
|
|
||||||
Unconditional,
|
|
||||||
Conditional(Interned<E>),
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<E> Copy for EdgeCondition<E> {}
|
|
||||||
|
|
||||||
impl<E> Clone for EdgeCondition<E> {
|
|
||||||
fn clone(&self) -> Self {
|
|
||||||
match self {
|
|
||||||
Self::Unconditional => Self::Unconditional,
|
|
||||||
Self::Conditional(arg0) => Self::Conditional(*arg0),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// An edge in the ranking rule graph.
|
/// An edge in the ranking rule graph.
|
||||||
///
|
///
|
||||||
/// It contains:
|
/// It contains:
|
||||||
@ -68,7 +43,27 @@ pub struct Edge<E> {
|
|||||||
pub source_node: Interned<QueryNode>,
|
pub source_node: Interned<QueryNode>,
|
||||||
pub dest_node: Interned<QueryNode>,
|
pub dest_node: Interned<QueryNode>,
|
||||||
pub cost: u8,
|
pub cost: u8,
|
||||||
pub condition: EdgeCondition<E>,
|
pub condition: Option<Interned<E>>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E> Hash for Edge<E> {
|
||||||
|
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||||
|
self.source_node.hash(state);
|
||||||
|
self.dest_node.hash(state);
|
||||||
|
self.cost.hash(state);
|
||||||
|
self.condition.hash(state);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<E> Eq for Edge<E> {}
|
||||||
|
|
||||||
|
impl<E> PartialEq for Edge<E> {
|
||||||
|
fn eq(&self, other: &Self) -> bool {
|
||||||
|
self.source_node == other.source_node
|
||||||
|
&& self.dest_node == other.dest_node
|
||||||
|
&& self.cost == other.cost
|
||||||
|
&& self.condition == other.condition
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/// A trait to be implemented by a marker type to build a graph-based ranking rule.
|
/// A trait to be implemented by a marker type to build a graph-based ranking rule.
|
||||||
@ -113,12 +108,12 @@ pub trait RankingRuleGraphTrait: Sized {
|
|||||||
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||||
source_node: &QueryNode,
|
source_node: &QueryNode,
|
||||||
dest_node: &QueryNode,
|
dest_node: &QueryNode,
|
||||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>;
|
) -> Result<Vec<(u8, Option<Interned<Self::EdgeCondition>>)>>;
|
||||||
|
|
||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &RankingRuleGraph<Self>,
|
graph: &RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<Interned<Self::EdgeCondition>>],
|
paths: &[Vec<Interned<Self::EdgeCondition>>],
|
||||||
empty_paths_cache: &DeadEndPathCache<Self>,
|
dead_end_path_cache: &DeadEndPathCache<Self>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<Self::EdgeCondition>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<Self::EdgeCondition>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
@ -151,15 +146,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
pub fn remove_edges_with_condition(&mut self, condition_to_remove: Interned<G::EdgeCondition>) {
|
pub fn remove_edges_with_condition(&mut self, condition_to_remove: Interned<G::EdgeCondition>) {
|
||||||
for (edge_id, edge_opt) in self.edges_store.iter_mut() {
|
for (edge_id, edge_opt) in self.edges_store.iter_mut() {
|
||||||
let Some(edge) = edge_opt.as_mut() else { continue };
|
let Some(edge) = edge_opt.as_mut() else { continue };
|
||||||
match edge.condition {
|
let Some(condition) = edge.condition else { continue };
|
||||||
EdgeCondition::Unconditional => continue,
|
|
||||||
EdgeCondition::Conditional(condition) => {
|
if condition == condition_to_remove {
|
||||||
if condition == condition_to_remove {
|
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
||||||
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
*edge_opt = None;
|
||||||
*edge_opt = None;
|
self.edges_of_node.get_mut(source_node).remove(edge_id);
|
||||||
self.edges_of_node.get_mut(source_node).remove(edge_id);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@
|
|||||||
|
|
||||||
use crate::search::new::interner::Interned;
|
use crate::search::new::interner::Interned;
|
||||||
|
|
||||||
/// A set of [`Path`]
|
/// A set of `Vec<Interned<T>>`.
|
||||||
pub struct PathSet<T> {
|
pub struct PathSet<T> {
|
||||||
nodes: Vec<(Interned<T>, Self)>,
|
nodes: Vec<(Interned<T>, Self)>,
|
||||||
is_end: bool,
|
is_end: bool,
|
||||||
|
@ -7,7 +7,6 @@ use crate::search::new::interner::{DedupInterner, Interned};
|
|||||||
use crate::search::new::query_graph::QueryNodeData;
|
use crate::search::new::query_graph::QueryNodeData;
|
||||||
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm};
|
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm};
|
||||||
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
||||||
use crate::search::new::ranking_rule_graph::EdgeCondition;
|
|
||||||
use crate::search::new::{QueryNode, SearchContext};
|
use crate::search::new::{QueryNode, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
@ -40,7 +39,7 @@ pub fn build_edges<'ctx>(
|
|||||||
conditions_interner: &mut DedupInterner<ProximityCondition>,
|
conditions_interner: &mut DedupInterner<ProximityCondition>,
|
||||||
from_node: &QueryNode,
|
from_node: &QueryNode,
|
||||||
to_node: &QueryNode,
|
to_node: &QueryNode,
|
||||||
) -> Result<Vec<(u8, EdgeCondition<ProximityCondition>)>> {
|
) -> Result<Vec<(u8, Option<Interned<ProximityCondition>>)>> {
|
||||||
let SearchContext {
|
let SearchContext {
|
||||||
index,
|
index,
|
||||||
txn,
|
txn,
|
||||||
@ -52,7 +51,7 @@ pub fn build_edges<'ctx>(
|
|||||||
} = ctx;
|
} = ctx;
|
||||||
|
|
||||||
let right_term = match &to_node.data {
|
let right_term = match &to_node.data {
|
||||||
QueryNodeData::End => return Ok(vec![(0, EdgeCondition::Unconditional)]),
|
QueryNodeData::End => return Ok(vec![(0, None)]),
|
||||||
QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]),
|
QueryNodeData::Deleted | QueryNodeData::Start => return Ok(vec![]),
|
||||||
QueryNodeData::Term(term) => term,
|
QueryNodeData::Term(term) => term,
|
||||||
};
|
};
|
||||||
@ -70,7 +69,7 @@ pub fn build_edges<'ctx>(
|
|||||||
QueryNodeData::Start => {
|
QueryNodeData::Start => {
|
||||||
return Ok(vec![(
|
return Ok(vec![(
|
||||||
(right_ngram_length - 1) as u8,
|
(right_ngram_length - 1) as u8,
|
||||||
EdgeCondition::Conditional(
|
Some(
|
||||||
conditions_interner
|
conditions_interner
|
||||||
.insert(ProximityCondition::Term { term: *right_term_interned }),
|
.insert(ProximityCondition::Term { term: *right_term_interned }),
|
||||||
),
|
),
|
||||||
@ -88,7 +87,7 @@ pub fn build_edges<'ctx>(
|
|||||||
// but `sun` and `are` have no proximity condition between them
|
// but `sun` and `are` have no proximity condition between them
|
||||||
return Ok(vec![(
|
return Ok(vec![(
|
||||||
(right_ngram_length - 1) as u8,
|
(right_ngram_length - 1) as u8,
|
||||||
EdgeCondition::Conditional(
|
Some(
|
||||||
conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }),
|
conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }),
|
||||||
),
|
),
|
||||||
)]);
|
)]);
|
||||||
@ -140,7 +139,7 @@ pub fn build_edges<'ctx>(
|
|||||||
.map(|(cost, word_pairs)| {
|
.map(|(cost, word_pairs)| {
|
||||||
(
|
(
|
||||||
cost,
|
cost,
|
||||||
EdgeCondition::Conditional(
|
Some(
|
||||||
conditions_interner
|
conditions_interner
|
||||||
.insert(ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice() }),
|
.insert(ProximityCondition::Pairs { pairs: word_pairs.into_boxed_slice() }),
|
||||||
),
|
),
|
||||||
@ -149,9 +148,7 @@ pub fn build_edges<'ctx>(
|
|||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
new_edges.push((
|
new_edges.push((
|
||||||
8 + (right_ngram_length - 1) as u8,
|
8 + (right_ngram_length - 1) as u8,
|
||||||
EdgeCondition::Conditional(
|
Some(conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned })),
|
||||||
conditions_interner.insert(ProximityCondition::Term { term: *right_term_interned }),
|
|
||||||
),
|
|
||||||
));
|
));
|
||||||
Ok(new_edges)
|
Ok(new_edges)
|
||||||
}
|
}
|
||||||
|
@ -6,8 +6,8 @@ use std::iter::FromIterator;
|
|||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::empty_paths_cache::DeadEndPathCache;
|
use super::dead_end_path_cache::DeadEndPathCache;
|
||||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||||
use crate::search::new::logger::SearchLogger;
|
use crate::search::new::logger::SearchLogger;
|
||||||
use crate::search::new::query_term::{Phrase, QueryTerm};
|
use crate::search::new::query_term::{Phrase, QueryTerm};
|
||||||
@ -60,20 +60,20 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
|||||||
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||||
source_node: &QueryNode,
|
source_node: &QueryNode,
|
||||||
dest_node: &QueryNode,
|
dest_node: &QueryNode,
|
||||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
) -> Result<Vec<(u8, Option<Interned<Self::EdgeCondition>>)>> {
|
||||||
build::build_edges(ctx, conditions_interner, source_node, dest_node)
|
build::build_edges(ctx, conditions_interner, source_node, dest_node)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &RankingRuleGraph<Self>,
|
graph: &RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<Interned<ProximityCondition>>],
|
paths: &[Vec<Interned<ProximityCondition>>],
|
||||||
empty_paths_cache: &DeadEndPathCache<Self>,
|
dead_end_path_cache: &DeadEndPathCache<Self>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) {
|
) {
|
||||||
logger.log_proximity_state(graph, paths, empty_paths_cache, universe, distances, cost);
|
logger.log_proximity_state(graph, paths, dead_end_path_cache, universe, distances, cost);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn label_for_edge_condition<'ctx>(
|
fn label_for_edge_condition<'ctx>(
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::empty_paths_cache::DeadEndPathCache;
|
use super::dead_end_path_cache::DeadEndPathCache;
|
||||||
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||||
use crate::search::new::logger::SearchLogger;
|
use crate::search::new::logger::SearchLogger;
|
||||||
use crate::search::new::query_graph::QueryNodeData;
|
use crate::search::new::query_graph::QueryNodeData;
|
||||||
@ -58,7 +58,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
conditions_interner: &mut DedupInterner<Self::EdgeCondition>,
|
||||||
_from_node: &QueryNode,
|
_from_node: &QueryNode,
|
||||||
to_node: &QueryNode,
|
to_node: &QueryNode,
|
||||||
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
|
) -> Result<Vec<(u8, Option<Interned<Self::EdgeCondition>>)>> {
|
||||||
let SearchContext { term_interner, .. } = ctx;
|
let SearchContext { term_interner, .. } = ctx;
|
||||||
match &to_node.data {
|
match &to_node.data {
|
||||||
QueryNodeData::Term(LocatedQueryTerm { value, positions }) => {
|
QueryNodeData::Term(LocatedQueryTerm { value, positions }) => {
|
||||||
@ -121,7 +121,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
if !new_term.is_empty() {
|
if !new_term.is_empty() {
|
||||||
edges.push((
|
edges.push((
|
||||||
nbr_typos as u8 + base_cost,
|
nbr_typos as u8 + base_cost,
|
||||||
EdgeCondition::Conditional(conditions_interner.insert(TypoEdge {
|
Some(conditions_interner.insert(TypoEdge {
|
||||||
term: term_interner.insert(new_term),
|
term: term_interner.insert(new_term),
|
||||||
nbr_typos: nbr_typos as u8,
|
nbr_typos: nbr_typos as u8,
|
||||||
})),
|
})),
|
||||||
@ -130,7 +130,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
}
|
}
|
||||||
Ok(edges)
|
Ok(edges)
|
||||||
}
|
}
|
||||||
QueryNodeData::End => Ok(vec![(0, EdgeCondition::Unconditional)]),
|
QueryNodeData::End => Ok(vec![(0, None)]),
|
||||||
QueryNodeData::Deleted | QueryNodeData::Start => panic!(),
|
QueryNodeData::Deleted | QueryNodeData::Start => panic!(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -138,13 +138,13 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &RankingRuleGraph<Self>,
|
graph: &RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<Interned<TypoEdge>>],
|
paths: &[Vec<Interned<TypoEdge>>],
|
||||||
empty_paths_cache: &DeadEndPathCache<Self>,
|
dead_end_path_cache: &DeadEndPathCache<Self>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoEdge>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) {
|
) {
|
||||||
logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances, cost);
|
logger.log_typo_state(graph, paths, dead_end_path_cache, universe, distances, cost);
|
||||||
}
|
}
|
||||||
|
|
||||||
fn label_for_edge_condition<'ctx>(
|
fn label_for_edge_condition<'ctx>(
|
||||||
|
@ -2,6 +2,7 @@ use std::marker::PhantomData;
|
|||||||
|
|
||||||
use super::interner::{FixedSizeInterner, Interned};
|
use super::interner::{FixedSizeInterner, Interned};
|
||||||
|
|
||||||
|
/// A compact set of [`Interned<T>`]
|
||||||
pub struct SmallBitmap<T> {
|
pub struct SmallBitmap<T> {
|
||||||
internal: SmallBitmapInternal,
|
internal: SmallBitmapInternal,
|
||||||
_phantom: PhantomData<T>,
|
_phantom: PhantomData<T>,
|
||||||
|
Loading…
x
Reference in New Issue
Block a user