2023-03-08 15:04:25 +01:00
/*! Module implementing the graph used for the graph-based ranking rules
and its related algorithms .
A ranking rule graph is built on top of the [ ` QueryGraph ` ] : the nodes stay
the same but the edges are replaced .
* /
2023-03-06 08:35:01 +01:00
mod build ;
mod cheapest_paths ;
mod edge_docids_cache ;
mod empty_paths_cache ;
mod paths_map ;
2023-03-08 15:04:25 +01:00
/// Implementation of the `proximity` ranking rule
2023-03-06 08:35:01 +01:00
mod proximity ;
2023-03-08 15:04:25 +01:00
/// Implementation of the `typo` ranking rule
2023-03-06 08:35:01 +01:00
mod typo ;
pub use edge_docids_cache ::EdgeDocidsCache ;
pub use empty_paths_cache ::EmptyPathsCache ;
pub use proximity ::ProximityGraph ;
2023-02-21 09:46:00 +01:00
use roaring ::RoaringBitmap ;
2023-03-06 19:21:55 +01:00
pub use typo ::TypoGraph ;
2023-02-21 09:46:00 +01:00
2023-03-08 09:55:53 +01:00
use super ::logger ::SearchLogger ;
use super ::small_bitmap ::SmallBitmap ;
use super ::{ QueryGraph , QueryNode , SearchContext } ;
use crate ::Result ;
2023-03-08 15:04:25 +01:00
/// The condition that is associated with an edge in the ranking rule graph.
///
/// Some edges are unconditional, which means that traversing them does not reduce
/// the set of candidates.
///
/// Most edges, however, have a condition attached to them. For example, for the
/// proximity ranking rule, the condition could be that a word is N-close to another one.
/// When the edge is traversed, some database operations are executed to retrieve the set
/// of documents that satisfy the condition, which reduces the list of candidate document ids.
2023-02-21 09:46:00 +01:00
#[ derive(Debug, Clone) ]
2023-03-08 15:04:25 +01:00
pub enum EdgeCondition < E > {
2023-02-21 09:46:00 +01:00
Unconditional ,
2023-03-08 15:04:25 +01:00
Conditional ( E ) ,
2023-02-21 09:46:00 +01:00
}
2023-03-08 15:04:25 +01:00
/// An edge in the ranking rule graph.
///
/// It contains:
/// 1. The source and destination nodes
/// 2. The cost of traversing this edge
/// 3. The condition associated with it
2023-02-21 09:46:00 +01:00
#[ derive(Debug, Clone) ]
pub struct Edge < E > {
2023-03-08 15:04:25 +01:00
pub source_node : u16 ,
pub dest_node : u16 ,
2023-02-23 13:13:19 +01:00
pub cost : u8 ,
2023-03-08 15:04:25 +01:00
pub condition : EdgeCondition < E > ,
2023-02-21 09:46:00 +01:00
}
2023-03-06 19:21:55 +01:00
// pub struct SubWordDerivations {
// words: FxHashSet<Interned<String>>,
2023-03-08 15:04:25 +01:00
// phrases: FxHashSet<Interned<Phrase>>,
2023-03-06 19:21:55 +01:00
// use_prefix_db: bool,
// }
// pub struct EdgeWordDerivations {
// // TODO: not Option, instead: Any | All | Subset(SubWordDerivations)
// from_words: Option<SubWordDerivations>, // ???
// to_words: Option<SubWordDerivations>, // + use prefix db?
// }
// fn aggregate_edge_word_derivations(
// graph: (),
// edges: Vec<usize>,
// ) -> BTreeMap<usize, SubWordDerivations> {
// todo!()
// }
// fn reduce_word_term_to_sub_word_derivations(
// term: &mut WordDerivations,
// derivations: &SubWordDerivations,
// ) {
// let mut new_one_typo = vec![];
// for w in term.one_typo {
// if derivations.words.contains(w) {
// new_one_typo.push(w);
// }
// }
// if term.use_prefix_db && !derivations.use_prefix_db {
// term.use_prefix_db = false;
// }
// // etc.
// }
// fn word_derivations_used_by_edge<G: RankingRuleGraphTrait>(
2023-03-08 15:04:25 +01:00
// edge: G::EdgeCondition,
2023-03-06 19:21:55 +01:00
// ) -> SubWordDerivations {
// todo!()
// }
2023-03-08 15:04:25 +01:00
/// A trait to be implemented by a marker type to build a graph-based ranking rule.
///
/// It mostly describes how to:
/// 1. Retrieve the set of edges (their cost and condition) between two nodes.
/// 2. Compute the document ids satisfying a condition
2023-02-23 13:13:19 +01:00
pub trait RankingRuleGraphTrait : Sized {
2023-03-08 15:04:25 +01:00
/// The condition of an edge connecting two query nodes. The condition
2023-02-21 12:33:32 +01:00
/// should be sufficient to compute the edge's cost and associated document ids
2023-03-08 15:04:25 +01:00
/// in [`resolve_edge_condition`](RankingRuleGraphTrait::resolve_edge_condition).
type EdgeCondition : Sized + Clone ;
2023-02-21 12:33:32 +01:00
2023-03-08 15:04:25 +01:00
/// A structure used in the construction of the graph, created when a
/// query graph source node is visited. It is used to determine the cost
/// and condition of a ranking rule edge when the destination node is visited.
2023-02-21 09:46:00 +01:00
type BuildVisitedFromNode ;
2023-03-08 15:04:25 +01:00
/// Return the label of the given edge condition, to be used when visualising
/// the ranking rule graph.
fn label_for_edge_condition ( edge : & Self ::EdgeCondition ) -> String ;
2023-02-21 09:46:00 +01:00
2023-03-08 15:04:25 +01:00
/// Compute the document ids associated with the given edge condition,
/// restricted to the given universe.
fn resolve_edge_condition < ' search > (
2023-03-06 19:21:55 +01:00
ctx : & mut SearchContext < ' search > ,
2023-03-08 15:04:25 +01:00
edge_condition : & Self ::EdgeCondition ,
2023-03-07 14:42:58 +01:00
universe : & RoaringBitmap ,
2023-02-21 09:46:00 +01:00
) -> Result < RoaringBitmap > ;
2023-03-08 15:04:25 +01:00
/// Prepare to build the edges outgoing from `source_node`.
2023-02-21 12:33:32 +01:00
///
2023-03-08 15:04:25 +01:00
/// This call is followed by zero, one or more calls to [`build_step_visit_destination_node`](RankingRuleGraphTrait::build_step_visit_destination_node),
2023-02-21 12:33:32 +01:00
/// which builds the actual edges.
2023-03-08 15:04:25 +01:00
fn build_step_visit_source_node < ' search > (
2023-03-06 19:21:55 +01:00
ctx : & mut SearchContext < ' search > ,
2023-03-08 15:04:25 +01:00
source_node : & QueryNode ,
2023-02-21 09:46:00 +01:00
) -> Result < Option < Self ::BuildVisitedFromNode > > ;
2023-03-08 15:04:25 +01:00
/// Return the cost and condition of the edges going from the previously visited node
/// (with [`build_step_visit_source_node`](RankingRuleGraphTrait::build_step_visit_source_node)) to `dest_node`.
fn build_step_visit_destination_node < ' from_data , ' search : ' from_data > (
2023-03-06 19:21:55 +01:00
ctx : & mut SearchContext < ' search > ,
2023-03-08 15:04:25 +01:00
dest_node : & QueryNode ,
source_node_data : & ' from_data Self ::BuildVisitedFromNode ,
) -> Result < Vec < ( u8 , EdgeCondition < Self ::EdgeCondition > ) > > ;
2023-02-23 13:13:19 +01:00
fn log_state (
graph : & RankingRuleGraph < Self > ,
2023-03-07 14:42:58 +01:00
paths : & [ Vec < u16 > ] ,
2023-02-27 15:04:40 +01:00
empty_paths_cache : & EmptyPathsCache ,
2023-03-02 21:27:42 +01:00
universe : & RoaringBitmap ,
2023-03-08 09:53:05 +01:00
distances : & [ Vec < ( u16 , SmallBitmap ) > ] ,
2023-03-07 14:42:58 +01:00
cost : u16 ,
2023-02-23 13:13:19 +01:00
logger : & mut dyn SearchLogger < QueryGraph > ,
) ;
2023-02-21 09:46:00 +01:00
}
2023-03-08 15:04:25 +01:00
/// The graph used by graph-based ranking rules.
///
/// It is built on top of a [`QueryGraph`], keeping the same nodes
/// but replacing the edges.
2023-02-21 09:46:00 +01:00
pub struct RankingRuleGraph < G : RankingRuleGraphTrait > {
pub query_graph : QueryGraph ,
2023-03-08 15:04:25 +01:00
pub edges_store : Vec < Option < Edge < G ::EdgeCondition > > > ,
pub edges_of_node : Vec < SmallBitmap > ,
2023-02-21 09:46:00 +01:00
}
2023-02-23 13:13:19 +01:00
impl < G : RankingRuleGraphTrait > Clone for RankingRuleGraph < G > {
fn clone ( & self ) -> Self {
Self {
query_graph : self . query_graph . clone ( ) ,
2023-03-08 15:04:25 +01:00
edges_store : self . edges_store . clone ( ) ,
edges_of_node : self . edges_of_node . clone ( ) ,
2023-02-23 13:13:19 +01:00
}
}
}
2023-02-21 09:46:00 +01:00
impl < G : RankingRuleGraphTrait > RankingRuleGraph < G > {
2023-03-08 15:04:25 +01:00
/// Remove the given edge from the ranking rule graph
pub fn remove_ranking_rule_edge ( & mut self , edge_index : u16 ) {
let edge_opt = & mut self . edges_store [ edge_index as usize ] ;
2023-02-21 12:33:32 +01:00
let Some ( edge ) = & edge_opt else { return } ;
2023-03-08 15:04:25 +01:00
let ( source_node , _dest_node ) = ( edge . source_node , edge . dest_node ) ;
2023-02-21 12:33:32 +01:00
* edge_opt = None ;
2023-02-21 09:46:00 +01:00
2023-03-08 15:04:25 +01:00
self . edges_of_node [ source_node as usize ] . remove ( edge_index ) ;
2023-02-21 09:46:00 +01:00
}
}