Intern ranking rule graph edge conditions as well

This commit is contained in:
Loïc Lecrenier 2023-03-13 12:46:32 +01:00
parent 5155fd2bf1
commit 1c58cf8426
9 changed files with 160 additions and 115 deletions

View File

@ -40,10 +40,11 @@ use roaring::RoaringBitmap;
use super::logger::SearchLogger; use super::logger::SearchLogger;
use super::ranking_rule_graph::{ use super::ranking_rule_graph::{
EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait, TypoGraph, ProximityGraph, EdgeCondition, EdgeConditionsCache, EmptyPathsCache, ProximityGraph, RankingRuleGraph,
RankingRuleGraphTrait, TypoGraph,
}; };
use super::small_bitmap::SmallBitmap; use super::small_bitmap::SmallBitmap;
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::Result; use crate::Result;
pub type Proximity = GraphBasedRankingRule<ProximityGraph>; pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
@ -78,7 +79,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
/// The current graph /// The current graph
graph: RankingRuleGraph<G>, graph: RankingRuleGraph<G>,
/// Cache to retrieve the docids associated with each edge /// Cache to retrieve the docids associated with each edge
edge_docids_cache: EdgeDocidsCache<G>, edge_conditions_cache: EdgeConditionsCache<G>,
/// Cache used to optimistically discard paths that resolve to no documents. /// Cache used to optimistically discard paths that resolve to no documents.
empty_paths_cache: EmptyPathsCache, empty_paths_cache: EmptyPathsCache,
/// A structure giving the list of possible costs from each node to the end node, /// A structure giving the list of possible costs from each node to the end node,
@ -94,25 +95,27 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
fn remove_empty_edges<'search, G: RankingRuleGraphTrait>( fn remove_empty_edges<'search, G: RankingRuleGraphTrait>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
graph: &mut RankingRuleGraph<G>, graph: &mut RankingRuleGraph<G>,
edge_docids_cache: &mut EdgeDocidsCache<G>, edge_docids_cache: &mut EdgeConditionsCache<G>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
empty_paths_cache: &mut EmptyPathsCache, empty_paths_cache: &mut EmptyPathsCache,
) -> Result<()> { ) -> Result<()> {
for edge_index in 0..graph.edges_store.len() as u16 { for edge_index in 0..graph.edges_store.len() as u16 {
if graph.edges_store[edge_index as usize].is_none() { let Some(edge) = graph.edges_store[edge_index as usize].as_ref() else {
continue; continue;
} };
let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?; let condition = edge.condition;
match docids {
BitmapOrAllRef::Bitmap(docids) => { match condition {
EdgeCondition::Unconditional => continue,
EdgeCondition::Conditional(condition) => {
let docids = edge_docids_cache.get_edge_docids(ctx, condition, graph, universe)?;
if docids.is_disjoint(universe) { if docids.is_disjoint(universe) {
graph.remove_ranking_rule_edge(edge_index); graph.remove_ranking_rule_edge(edge_index);
empty_paths_cache.forbid_edge(edge_index); empty_paths_cache.forbid_edge(edge_index);
edge_docids_cache.cache.remove(&edge_index); edge_docids_cache.cache.remove(&condition);
continue; continue;
} }
} }
BitmapOrAllRef::All => continue,
} }
} }
Ok(()) Ok(())
@ -132,7 +135,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
query_graph: &QueryGraph, query_graph: &QueryGraph,
) -> Result<()> { ) -> Result<()> {
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?; let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
let mut edge_docids_cache = EdgeDocidsCache::default(); let mut edge_docids_cache = EdgeConditionsCache::default();
let mut empty_paths_cache = EmptyPathsCache::new(graph.edges_store.len() as u16); let mut empty_paths_cache = EmptyPathsCache::new(graph.edges_store.len() as u16);
// First simplify the graph as much as possible, by computing the docids of the edges // First simplify the graph as much as possible, by computing the docids of the edges
@ -150,7 +153,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
let state = GraphBasedRankingRuleState { let state = GraphBasedRankingRuleState {
graph, graph,
edge_docids_cache, edge_conditions_cache: edge_docids_cache,
empty_paths_cache, empty_paths_cache,
all_distances, all_distances,
cur_distance_idx: 0, cur_distance_idx: 0,
@ -174,11 +177,11 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
// should never happen // should never happen
let mut state = self.state.take().unwrap(); let mut state = self.state.take().unwrap();
// TODO: does this have a real positive performance cost? // TODO: does this have a real positive performance impact?
remove_empty_edges( remove_empty_edges(
ctx, ctx,
&mut state.graph, &mut state.graph,
&mut state.edge_docids_cache, &mut state.edge_conditions_cache,
universe, universe,
&mut state.empty_paths_cache, &mut state.empty_paths_cache,
)?; )?;
@ -201,17 +204,17 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
let GraphBasedRankingRuleState { let GraphBasedRankingRuleState {
graph, graph,
edge_docids_cache, edge_conditions_cache: edge_docids_cache,
empty_paths_cache, empty_paths_cache,
all_distances, all_distances,
cur_distance_idx: _, cur_distance_idx: _,
} = &mut state; } = &mut state;
let original_universe = universe; // let original_universe = universe;
let mut universe = universe.clone(); let mut universe = universe.clone();
// TODO: remove this unnecessary clone // TODO: remove this unnecessary clone
let original_graph = graph.clone(); // let original_graph = graph.clone();
// and this vector as well // and this vector as well
let mut paths = vec![]; let mut paths = vec![];
@ -241,12 +244,15 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
for &edge_index in path { for &edge_index in path {
visited_edges.push(edge_index); visited_edges.push(edge_index);
let edge_docids = let edge = graph.edges_store[edge_index as usize].as_ref().unwrap();
edge_docids_cache.get_edge_docids(ctx, edge_index, graph, &universe)?; let condition = match edge.condition {
let edge_docids = match edge_docids { EdgeCondition::Unconditional => continue,
BitmapOrAllRef::Bitmap(b) => b, EdgeCondition::Conditional(condition) => condition,
BitmapOrAllRef::All => continue,
}; };
let edge_docids =
edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?;
cached_edge_docids.push((edge_index, edge_docids.clone())); cached_edge_docids.push((edge_index, edge_docids.clone()));
// If the edge is empty, then the path will be empty as well, we update the graph // If the edge is empty, then the path will be empty as well, we update the graph
@ -257,7 +263,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
// 2. remove this edge from the ranking rule graph // 2. remove this edge from the ranking rule graph
graph.remove_ranking_rule_edge(edge_index); graph.remove_ranking_rule_edge(edge_index);
// 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore // 3. Also remove the entry from the edge_docids_cache, since we don't need it anymore
edge_docids_cache.cache.remove(&edge_index); edge_docids_cache.cache.remove(&condition);
return Ok(()); return Ok(());
} }
path_docids &= edge_docids; path_docids &= edge_docids;
@ -279,6 +285,8 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
empty_paths_cache.forbid_couple_edges(*edge_index2, edge_index); empty_paths_cache.forbid_couple_edges(*edge_index2, edge_index);
} }
} }
// We should maybe instead try to compute:
// 0th & nth & 1st & n-1th & 2nd & etc...
return Ok(()); return Ok(());
} }
} }
@ -289,15 +297,15 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
}, },
)?; )?;
G::log_state( // G::log_state(
&original_graph, // &original_graph,
&paths, // &paths,
&state.empty_paths_cache, // &state.empty_paths_cache,
original_universe, // original_universe,
&state.all_distances, // &state.all_distances,
cost, // cost,
logger, // logger,
); // );
// TODO: Graph-based ranking rules do not (yet) modify the query graph. We could, however, // TODO: Graph-based ranking rules do not (yet) modify the query graph. We could, however,
// remove nodes and/or terms within nodes that weren't present in any of the paths. // remove nodes and/or terms within nodes that weren't present in any of the paths.

View File

@ -14,6 +14,21 @@ impl<T> Interned<T> {
Self { idx, _phantom: PhantomData } Self { idx, _phantom: PhantomData }
} }
} }
// TODO: the stable store should be replaced by a bump allocator
// and the interned value should be a pointer wrapper
// then we can get its value with `interned.get()` instead of `interner.get(interned)`
// and as a bonus, its validity is tracked with Rust's lifetime system
// one problem is that we need two lifetimes: one for the bump allocator, one for the
// hashmap
// but that's okay, we can use:
// ```
// struct Interner<'bump> {
// bump: &'bump Bump,
// lookup: FxHashMap
// }
// ```
/// An [`Interner`] is used to store a unique copy of a value of type `T`. This value /// An [`Interner`] is used to store a unique copy of a value of type `T`. This value
/// is then identified by a lightweight index of type [`Interned<T>`], which can /// is then identified by a lightweight index of type [`Interned<T>`], which can
/// be copied, compared, and hashed efficiently. An immutable reference to the original value /// be copied, compared, and hashed efficiently. An immutable reference to the original value

View File

@ -35,11 +35,6 @@ use crate::search::new::query_term::located_query_terms_from_string;
use crate::search::new::words::Words; use crate::search::new::words::Words;
use crate::{Filter, Index, Result, TermsMatchingStrategy}; use crate::{Filter, Index, Result, TermsMatchingStrategy};
pub enum BitmapOrAllRef<'s> {
Bitmap(&'s RoaringBitmap),
All,
}
pub struct SearchContext<'search> { pub struct SearchContext<'search> {
pub index: &'search Index, pub index: &'search Index,
pub txn: &'search RoTxn<'search>, pub txn: &'search RoTxn<'search>,

View File

@ -1,6 +1,7 @@
use std::collections::HashSet; use std::collections::HashSet;
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait}; use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::Interner;
use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
use crate::search::new::{QueryGraph, SearchContext}; use crate::search::new::{QueryGraph, SearchContext};
use crate::Result; use crate::Result;
@ -10,6 +11,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> { pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph; let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph;
let mut conditions_interner = Interner::default();
let mut edges_store = vec![]; let mut edges_store = vec![];
let mut edges_of_node = vec![]; let mut edges_of_node = vec![];
@ -21,18 +24,22 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
for successor_idx in graph_edges[node_idx].successors.iter() { for successor_idx in graph_edges[node_idx].successors.iter() {
let dest_node = &graph_nodes[successor_idx as usize]; let dest_node = &graph_nodes[successor_idx as usize];
let edges = let edges = G::build_step_visit_destination_node(
G::build_step_visit_destination_node(ctx, dest_node, &source_node_data)?; ctx,
&mut conditions_interner,
dest_node,
&source_node_data,
)?;
if edges.is_empty() { if edges.is_empty() {
continue; continue;
} }
for (cost, details) in edges { for (cost, condition) in edges {
edges_store.push(Some(Edge { edges_store.push(Some(Edge {
source_node: node_idx as u16, source_node: node_idx as u16,
dest_node: successor_idx, dest_node: successor_idx,
cost, cost,
condition: details, condition,
})); }));
new_edges.insert(edges_store.len() as u16 - 1); new_edges.insert(edges_store.len() as u16 - 1);
} }
@ -43,6 +50,6 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), edges_store.len() as u16)) .map(|edges| SmallBitmap::from_iter(edges.into_iter(), edges_store.len() as u16))
.collect(); .collect();
Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node }) Ok(RankingRuleGraph { query_graph, edges_store, edges_of_node, conditions_interner })
} }
} }

View File

@ -3,22 +3,23 @@ use std::marker::PhantomData;
use fxhash::FxHashMap; use fxhash::FxHashMap;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait}; use super::{RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::{BitmapOrAllRef, SearchContext}; use crate::search::new::interner::Interned;
use crate::search::new::SearchContext;
use crate::Result; use crate::Result;
/// A cache storing the document ids associated with each ranking rule edge /// A cache storing the document ids associated with each ranking rule edge
pub struct EdgeDocidsCache<G: RankingRuleGraphTrait> { pub struct EdgeConditionsCache<G: RankingRuleGraphTrait> {
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap> // TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
pub cache: FxHashMap<u16, RoaringBitmap>, pub cache: FxHashMap<Interned<G::EdgeCondition>, RoaringBitmap>,
_phantom: PhantomData<G>, _phantom: PhantomData<G>,
} }
impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> { impl<G: RankingRuleGraphTrait> Default for EdgeConditionsCache<G> {
fn default() -> Self { fn default() -> Self {
Self { cache: Default::default(), _phantom: Default::default() } Self { cache: Default::default(), _phantom: Default::default() }
} }
} }
impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> { impl<G: RankingRuleGraphTrait> EdgeConditionsCache<G> {
/// Retrieve the document ids for the given edge condition. /// Retrieve the document ids for the given edge condition.
/// ///
/// If the cache does not yet contain these docids, they are computed /// If the cache does not yet contain these docids, they are computed
@ -27,30 +28,25 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
&'s mut self, &'s mut self,
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
// TODO: should be Interned<EdgeCondition> // TODO: should be Interned<EdgeCondition>
edge_index: u16, interned_edge_condition: Interned<G::EdgeCondition>,
graph: &RankingRuleGraph<G>, graph: &RankingRuleGraph<G>,
// TODO: maybe universe doesn't belong here // TODO: maybe universe doesn't belong here
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<BitmapOrAllRef<'s>> { ) -> Result<&'s RoaringBitmap> {
let edge = graph.edges_store[edge_index as usize].as_ref().unwrap(); if self.cache.contains_key(&interned_edge_condition) {
// TODO: should we update the bitmap in the cache if the new universe
match &edge.condition { // reduces it?
EdgeCondition::Unconditional => Ok(BitmapOrAllRef::All), // TODO: maybe have a generation: u32 to track every time the universe was
EdgeCondition::Conditional(details) => { // reduced. Then only attempt to recompute the intersection when there is a chance
if self.cache.contains_key(&edge_index) { // that edge_docids & universe changed
// TODO: should we update the bitmap in the cache if the new universe return Ok(&self.cache[&interned_edge_condition]);
// reduces it?
// TODO: maybe have a generation: u32 to track every time the universe was
// reduced. Then only attempt to recompute the intersection when there is a chance
// that edge_docids & universe changed
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
}
// TODO: maybe universe doesn't belong here
let docids = universe & G::resolve_edge_condition(ctx, details, universe)?;
let _ = self.cache.insert(edge_index, docids);
let docids = &self.cache[&edge_index];
Ok(BitmapOrAllRef::Bitmap(docids))
}
} }
// TODO: maybe universe doesn't belong here
let edge_condition = graph.conditions_interner.get(interned_edge_condition);
// TODO: faster way to do this?
let docids = universe & G::resolve_edge_condition(ctx, edge_condition, universe)?;
let _ = self.cache.insert(interned_edge_condition, docids);
let docids = &self.cache[&interned_edge_condition];
Ok(docids)
} }
} }

View File

@ -16,12 +16,15 @@ mod proximity;
/// Implementation of the `typo` ranking rule /// Implementation of the `typo` ranking rule
mod typo; mod typo;
pub use edge_docids_cache::EdgeDocidsCache; use std::hash::Hash;
pub use edge_docids_cache::EdgeConditionsCache;
pub use empty_paths_cache::EmptyPathsCache; pub use empty_paths_cache::EmptyPathsCache;
pub use proximity::ProximityGraph; pub use proximity::ProximityGraph;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
pub use typo::TypoGraph; pub use typo::TypoGraph;
use super::interner::{Interned, Interner};
use super::logger::SearchLogger; use super::logger::SearchLogger;
use super::small_bitmap::SmallBitmap; use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext}; use super::{QueryGraph, QueryNode, SearchContext};
@ -36,10 +39,20 @@ use crate::Result;
/// proximity ranking rule, the condition could be that a word is N-close to another one. /// proximity ranking rule, the condition could be that a word is N-close to another one.
/// When the edge is traversed, some database operations are executed to retrieve the set /// When the edge is traversed, some database operations are executed to retrieve the set
/// of documents that satisfy the condition, which reduces the list of candidate document ids. /// of documents that satisfy the condition, which reduces the list of candidate document ids.
#[derive(Debug, Clone)]
pub enum EdgeCondition<E> { pub enum EdgeCondition<E> {
Unconditional, Unconditional,
Conditional(E), Conditional(Interned<E>),
}
impl<E> Copy for EdgeCondition<E> {}
impl<E> Clone for EdgeCondition<E> {
fn clone(&self) -> Self {
match self {
Self::Unconditional => Self::Unconditional,
Self::Conditional(arg0) => Self::Conditional(*arg0),
}
}
} }
/// An edge in the ranking rule graph. /// An edge in the ranking rule graph.
@ -48,7 +61,7 @@ pub enum EdgeCondition<E> {
/// 1. The source and destination nodes /// 1. The source and destination nodes
/// 2. The cost of traversing this edge /// 2. The cost of traversing this edge
/// 3. The condition associated with it /// 3. The condition associated with it
#[derive(Debug, Clone)] #[derive(Clone)]
pub struct Edge<E> { pub struct Edge<E> {
pub source_node: u16, pub source_node: u16,
pub dest_node: u16, pub dest_node: u16,
@ -106,7 +119,7 @@ pub trait RankingRuleGraphTrait: Sized {
/// The condition of an edge connecting two query nodes. The condition /// The condition of an edge connecting two query nodes. The condition
/// should be sufficient to compute the edge's cost and associated document ids /// should be sufficient to compute the edge's cost and associated document ids
/// in [`resolve_edge_condition`](RankingRuleGraphTrait::resolve_edge_condition). /// in [`resolve_edge_condition`](RankingRuleGraphTrait::resolve_edge_condition).
type EdgeCondition: Sized + Clone; type EdgeCondition: Sized + Clone + PartialEq + Eq + Hash;
/// A structure used in the construction of the graph, created when a /// A structure used in the construction of the graph, created when a
/// query graph source node is visited. It is used to determine the cost /// query graph source node is visited. It is used to determine the cost
@ -138,6 +151,7 @@ pub trait RankingRuleGraphTrait: Sized {
/// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`. /// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`.
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>( fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
conditions_interner: &mut Interner<Self::EdgeCondition>,
dest_node: &QueryNode, dest_node: &QueryNode,
source_node_data: &'from_data Self::BuildVisitedFromNode, source_node_data: &'from_data Self::BuildVisitedFromNode,
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>; ) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>>;
@ -161,16 +175,18 @@ pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
pub query_graph: QueryGraph, pub query_graph: QueryGraph,
pub edges_store: Vec<Option<Edge<G::EdgeCondition>>>, pub edges_store: Vec<Option<Edge<G::EdgeCondition>>>,
pub edges_of_node: Vec<SmallBitmap>, pub edges_of_node: Vec<SmallBitmap>,
pub conditions_interner: Interner<G::EdgeCondition>,
} }
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> { // impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
fn clone(&self) -> Self { // fn clone(&self) -> Self {
Self { // Self {
query_graph: self.query_graph.clone(), // query_graph: self.query_graph.clone(),
edges_store: self.edges_store.clone(), // edges_store: self.edges_store.clone(),
edges_of_node: self.edges_of_node.clone(), // edges_of_node: self.edges_of_node.clone(),
} // conditions_interner: self.conditions_interner.clone(),
} // }
} // }
// }
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
/// Remove the given edge from the ranking rule graph /// Remove the given edge from the ranking rule graph
pub fn remove_ranking_rule_edge(&mut self, edge_index: u16) { pub fn remove_ranking_rule_edge(&mut self, edge_index: u16) {

View File

@ -3,6 +3,7 @@ use std::collections::BTreeMap;
use itertools::Itertools; use itertools::Itertools;
use super::ProximityEdge; use super::ProximityEdge;
use crate::search::new::interner::Interner;
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::search::new::ranking_rule_graph::proximity::WordPair; use crate::search::new::ranking_rule_graph::proximity::WordPair;
use crate::search::new::ranking_rule_graph::EdgeCondition; use crate::search::new::ranking_rule_graph::EdgeCondition;
@ -59,6 +60,7 @@ pub fn visit_from_node(
pub fn visit_to_node<'search, 'from_data>( pub fn visit_to_node<'search, 'from_data>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
conditions_interner: &mut Interner<ProximityEdge>,
to_node: &QueryNode, to_node: &QueryNode,
from_node_data: &'from_data (WordDerivations, i8), from_node_data: &'from_data (WordDerivations, i8),
) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> { ) -> Result<Vec<(u8, EdgeCondition<ProximityEdge>)>> {
@ -224,22 +226,23 @@ pub fn visit_to_node<'search, 'from_data>(
} }
} }
} }
let mut new_edges = cost_proximity_word_pairs let mut new_edges =
.into_iter() cost_proximity_word_pairs
.flat_map(|(cost, proximity_word_pairs)| { .into_iter()
let mut edges = vec![]; .flat_map(|(cost, proximity_word_pairs)| {
for (proximity, word_pairs) in proximity_word_pairs { let mut edges = vec![];
edges.push(( for (proximity, word_pairs) in proximity_word_pairs {
cost, edges.push((
EdgeCondition::Conditional(ProximityEdge { cost,
pairs: word_pairs.into_boxed_slice(), EdgeCondition::Conditional(conditions_interner.insert(ProximityEdge {
proximity, pairs: word_pairs.into_boxed_slice(),
}), proximity,
)) })),
} ))
edges }
}) edges
.collect::<Vec<_>>(); })
.collect::<Vec<_>>();
new_edges.push((8 + (ngram_len2 - 1) as u8, EdgeCondition::Unconditional)); new_edges.push((8 + (ngram_len2 - 1) as u8, EdgeCondition::Unconditional));
Ok(new_edges) Ok(new_edges)
} }

View File

@ -5,23 +5,21 @@ use roaring::RoaringBitmap;
use super::empty_paths_cache::EmptyPathsCache; use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeCondition, RankingRuleGraphTrait}; use super::{EdgeCondition, RankingRuleGraphTrait};
use crate::search::new::interner::Interned; use crate::search::new::interner::{Interned, Interner};
use crate::search::new::logger::SearchLogger; use crate::search::new::logger::SearchLogger;
use crate::search::new::query_term::WordDerivations; use crate::search::new::query_term::WordDerivations;
use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
use crate::search::new::{QueryGraph, QueryNode, SearchContext}; use crate::search::new::{QueryGraph, QueryNode, SearchContext};
use crate::Result; use crate::Result;
// TODO: intern the proximity edges as well? #[derive(Clone, PartialEq, Eq, Hash)]
#[derive(Clone)]
pub enum WordPair { pub enum WordPair {
Words { left: Interned<String>, right: Interned<String> }, Words { left: Interned<String>, right: Interned<String> },
WordPrefix { left: Interned<String>, right_prefix: Interned<String> }, WordPrefix { left: Interned<String>, right_prefix: Interned<String> },
WordPrefixSwapped { left_prefix: Interned<String>, right: Interned<String> }, WordPrefixSwapped { left_prefix: Interned<String>, right: Interned<String> },
} }
#[derive(Clone)] #[derive(Clone, PartialEq, Eq, Hash)]
pub struct ProximityEdge { pub struct ProximityEdge {
pairs: Box<[WordPair]>, pairs: Box<[WordPair]>,
proximity: u8, proximity: u8,
@ -55,10 +53,11 @@ impl RankingRuleGraphTrait for ProximityGraph {
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>( fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
conditions_interner: &mut Interner<Self::EdgeCondition>,
to_node: &QueryNode, to_node: &QueryNode,
from_node_data: &'from_data Self::BuildVisitedFromNode, from_node_data: &'from_data Self::BuildVisitedFromNode,
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> { ) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
build::visit_to_node(ctx, to_node, from_node_data) build::visit_to_node(ctx, conditions_interner, to_node, from_node_data)
} }
fn log_state( fn log_state(

View File

@ -2,14 +2,14 @@ use roaring::RoaringBitmap;
use super::empty_paths_cache::EmptyPathsCache; use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait}; use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::Interned; use crate::search::new::interner::{Interned, Interner};
use crate::search::new::logger::SearchLogger; use crate::search::new::logger::SearchLogger;
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations}; use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
use crate::search::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
use crate::search::new::{QueryGraph, QueryNode, SearchContext}; use crate::search::new::{QueryGraph, QueryNode, SearchContext};
use crate::Result; use crate::Result;
#[derive(Clone)] #[derive(Clone, PartialEq, Eq, Hash)]
pub enum TypoEdge { pub enum TypoEdge {
Phrase { phrase: Interned<Phrase> }, Phrase { phrase: Interned<Phrase> },
Word { derivations: Interned<WordDerivations>, nbr_typos: u8 }, Word { derivations: Interned<WordDerivations>, nbr_typos: u8 },
@ -78,15 +78,19 @@ impl RankingRuleGraphTrait for TypoGraph {
fn build_step_visit_destination_node<'from_data, 'search: 'from_data>( fn build_step_visit_destination_node<'from_data, 'search: 'from_data>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
conditions_interner: &mut Interner<Self::EdgeCondition>,
to_node: &QueryNode, to_node: &QueryNode,
_from_node_data: &'from_data Self::BuildVisitedFromNode, _from_node_data: &'from_data Self::BuildVisitedFromNode,
) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> { ) -> Result<Vec<(u8, EdgeCondition<Self::EdgeCondition>)>> {
let SearchContext { derivations_interner, .. } = ctx; let SearchContext { derivations_interner, .. } = ctx;
match to_node { match to_node {
QueryNode::Term(LocatedQueryTerm { value, .. }) => match *value { QueryNode::Term(LocatedQueryTerm { value, .. }) => match *value {
QueryTerm::Phrase { phrase } => { QueryTerm::Phrase { phrase } => Ok(vec![(
Ok(vec![(0, EdgeCondition::Conditional(TypoEdge::Phrase { phrase }))]) 0,
} EdgeCondition::Conditional(
conditions_interner.insert(TypoEdge::Phrase { phrase }),
),
)]),
QueryTerm::Word { derivations } => { QueryTerm::Word { derivations } => {
let mut edges = vec![]; let mut edges = vec![];
@ -136,10 +140,12 @@ impl RankingRuleGraphTrait for TypoGraph {
if !new_derivations.is_empty() { if !new_derivations.is_empty() {
edges.push(( edges.push((
nbr_typos, nbr_typos,
EdgeCondition::Conditional(TypoEdge::Word { EdgeCondition::Conditional(conditions_interner.insert(
derivations: derivations_interner.insert(new_derivations), TypoEdge::Word {
nbr_typos, derivations: derivations_interner.insert(new_derivations),
}), nbr_typos,
},
)),
)) ))
} }
} }