2023-03-06 08:35:01 +01:00
|
|
|
mod build;
|
|
|
|
mod cheapest_paths;
|
|
|
|
mod edge_docids_cache;
|
|
|
|
mod empty_paths_cache;
|
|
|
|
mod paths_map;
|
|
|
|
mod proximity;
|
|
|
|
mod typo;
|
|
|
|
|
2023-03-06 19:21:55 +01:00
|
|
|
use super::logger::SearchLogger;
|
2023-03-07 14:42:58 +01:00
|
|
|
use super::small_bitmap::SmallBitmap;
|
2023-03-06 19:21:55 +01:00
|
|
|
use super::{QueryGraph, QueryNode, SearchContext};
|
|
|
|
use crate::Result;
|
2023-03-06 08:35:01 +01:00
|
|
|
pub use edge_docids_cache::EdgeDocidsCache;
|
|
|
|
pub use empty_paths_cache::EmptyPathsCache;
|
|
|
|
pub use proximity::ProximityGraph;
|
2023-02-21 09:46:00 +01:00
|
|
|
use roaring::RoaringBitmap;
|
2023-03-06 19:21:55 +01:00
|
|
|
pub use typo::TypoGraph;
|
2023-02-21 09:46:00 +01:00
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub enum EdgeDetails<E> {
|
|
|
|
Unconditional,
|
|
|
|
Data(E),
|
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub struct Edge<E> {
|
2023-03-07 14:42:58 +01:00
|
|
|
pub from_node: u16,
|
|
|
|
pub to_node: u16,
|
2023-02-23 13:13:19 +01:00
|
|
|
pub cost: u8,
|
|
|
|
pub details: EdgeDetails<E>,
|
2023-02-21 09:46:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
#[derive(Debug, Clone)]
|
|
|
|
pub struct EdgePointer<'graph, E> {
|
2023-03-07 14:42:58 +01:00
|
|
|
pub index: u16,
|
2023-02-21 09:46:00 +01:00
|
|
|
pub edge: &'graph Edge<E>,
|
|
|
|
}
|
|
|
|
|
2023-03-06 19:21:55 +01:00
|
|
|
// pub struct SubWordDerivations {
|
|
|
|
// words: FxHashSet<Interned<String>>,
|
|
|
|
// synonyms: FxHashSet<Interned<Phrase>>, // NO! they're phrases, not strings
|
|
|
|
// split_words: bool,
|
|
|
|
// use_prefix_db: bool,
|
|
|
|
// }
|
|
|
|
|
|
|
|
// pub struct EdgeWordDerivations {
|
|
|
|
// // TODO: not Option, instead: Any | All | Subset(SubWordDerivations)
|
|
|
|
// from_words: Option<SubWordDerivations>, // ???
|
|
|
|
// to_words: Option<SubWordDerivations>, // + use prefix db?
|
|
|
|
// }
|
|
|
|
|
|
|
|
// fn aggregate_edge_word_derivations(
|
|
|
|
// graph: (),
|
|
|
|
// edges: Vec<usize>,
|
|
|
|
// ) -> BTreeMap<usize, SubWordDerivations> {
|
|
|
|
// todo!()
|
|
|
|
// }
|
|
|
|
|
|
|
|
// fn reduce_word_term_to_sub_word_derivations(
|
|
|
|
// term: &mut WordDerivations,
|
|
|
|
// derivations: &SubWordDerivations,
|
|
|
|
// ) {
|
|
|
|
// let mut new_one_typo = vec![];
|
|
|
|
// for w in term.one_typo {
|
|
|
|
// if derivations.words.contains(w) {
|
|
|
|
// new_one_typo.push(w);
|
|
|
|
// }
|
|
|
|
// }
|
|
|
|
// if term.use_prefix_db && !derivations.use_prefix_db {
|
|
|
|
// term.use_prefix_db = false;
|
|
|
|
// }
|
|
|
|
// // etc.
|
|
|
|
// }
|
|
|
|
|
|
|
|
// fn word_derivations_used_by_edge<G: RankingRuleGraphTrait>(
|
|
|
|
// edge: G::EdgeDetails,
|
|
|
|
// ) -> SubWordDerivations {
|
|
|
|
// todo!()
|
|
|
|
// }
|
|
|
|
|
2023-02-23 13:13:19 +01:00
|
|
|
pub trait RankingRuleGraphTrait: Sized {
|
2023-02-21 12:33:32 +01:00
|
|
|
/// The details of an edge connecting two query nodes. These details
|
|
|
|
/// should be sufficient to compute the edge's cost and associated document ids
|
|
|
|
/// in [`compute_docids`](RankingRuleGraphTrait).
|
2023-02-23 13:13:19 +01:00
|
|
|
type EdgeDetails: Sized + Clone;
|
2023-02-21 12:33:32 +01:00
|
|
|
|
2023-02-21 09:46:00 +01:00
|
|
|
type BuildVisitedFromNode;
|
|
|
|
|
2023-02-21 12:33:32 +01:00
|
|
|
/// Return the label of the given edge details, to be used when visualising
|
|
|
|
/// the ranking rule graph using GraphViz.
|
|
|
|
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String;
|
2023-02-21 09:46:00 +01:00
|
|
|
|
2023-02-21 12:33:32 +01:00
|
|
|
/// Compute the document ids associated with the given edge.
|
2023-03-06 19:21:55 +01:00
|
|
|
fn compute_docids<'search>(
|
|
|
|
ctx: &mut SearchContext<'search>,
|
2023-02-21 09:46:00 +01:00
|
|
|
edge_details: &Self::EdgeDetails,
|
2023-03-07 14:42:58 +01:00
|
|
|
universe: &RoaringBitmap,
|
2023-02-21 09:46:00 +01:00
|
|
|
) -> Result<RoaringBitmap>;
|
|
|
|
|
2023-02-21 12:33:32 +01:00
|
|
|
/// Prepare to build the edges outgoing from `from_node`.
|
|
|
|
///
|
|
|
|
/// This call is followed by zero, one or more calls to [`build_visit_to_node`](RankingRuleGraphTrait::build_visit_to_node),
|
|
|
|
/// which builds the actual edges.
|
2023-03-06 19:21:55 +01:00
|
|
|
fn build_visit_from_node<'search>(
|
|
|
|
ctx: &mut SearchContext<'search>,
|
2023-02-21 09:46:00 +01:00
|
|
|
from_node: &QueryNode,
|
|
|
|
) -> Result<Option<Self::BuildVisitedFromNode>>;
|
|
|
|
|
2023-02-21 12:33:32 +01:00
|
|
|
/// Return the cost and details of the edges going from the previously visited node
|
|
|
|
/// (with [`build_visit_from_node`](RankingRuleGraphTrait::build_visit_from_node)) to `to_node`.
|
2023-03-06 19:21:55 +01:00
|
|
|
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
|
|
|
ctx: &mut SearchContext<'search>,
|
2023-02-21 09:46:00 +01:00
|
|
|
to_node: &QueryNode,
|
|
|
|
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
2023-02-21 12:33:32 +01:00
|
|
|
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>;
|
2023-02-23 13:13:19 +01:00
|
|
|
|
|
|
|
fn log_state(
|
|
|
|
graph: &RankingRuleGraph<Self>,
|
2023-03-07 14:42:58 +01:00
|
|
|
paths: &[Vec<u16>],
|
2023-02-27 15:04:40 +01:00
|
|
|
empty_paths_cache: &EmptyPathsCache,
|
2023-03-02 21:27:42 +01:00
|
|
|
universe: &RoaringBitmap,
|
2023-03-08 09:53:05 +01:00
|
|
|
distances: &[Vec<(u16, SmallBitmap)>],
|
2023-03-07 14:42:58 +01:00
|
|
|
cost: u16,
|
2023-02-23 13:13:19 +01:00
|
|
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
|
|
|
);
|
2023-02-21 09:46:00 +01:00
|
|
|
}
|
|
|
|
|
|
|
|
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
|
|
|
pub query_graph: QueryGraph,
|
|
|
|
// pub edges: Vec<HashMap<usize, Vec<Edge<G::EdgeDetails>>>>,
|
|
|
|
pub all_edges: Vec<Option<Edge<G::EdgeDetails>>>,
|
2023-02-21 12:33:32 +01:00
|
|
|
|
2023-03-07 14:42:58 +01:00
|
|
|
pub node_edges: Vec<SmallBitmap>,
|
2023-02-21 12:33:32 +01:00
|
|
|
|
2023-03-07 14:42:58 +01:00
|
|
|
pub successors: Vec<SmallBitmap>,
|
2023-02-21 13:57:34 +01:00
|
|
|
// TODO: to get the edges between two nodes:
|
2023-02-21 12:33:32 +01:00
|
|
|
// 1. get node_outgoing_edges[from]
|
|
|
|
// 2. get node_incoming_edges[to]
|
|
|
|
// 3. take intersection betweem the two
|
2023-02-21 09:46:00 +01:00
|
|
|
}
|
2023-02-23 13:13:19 +01:00
|
|
|
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
|
|
|
fn clone(&self) -> Self {
|
|
|
|
Self {
|
|
|
|
query_graph: self.query_graph.clone(),
|
|
|
|
all_edges: self.all_edges.clone(),
|
|
|
|
node_edges: self.node_edges.clone(),
|
|
|
|
successors: self.successors.clone(),
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
2023-02-21 09:46:00 +01:00
|
|
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
2023-03-07 14:42:58 +01:00
|
|
|
pub fn remove_edge(&mut self, edge_index: u16) {
|
2023-02-21 12:55:44 +01:00
|
|
|
let edge_opt = &mut self.all_edges[edge_index as usize];
|
2023-02-21 12:33:32 +01:00
|
|
|
let Some(edge) = &edge_opt else { return };
|
2023-02-28 11:49:24 +01:00
|
|
|
let (from_node, _to_node) = (edge.from_node, edge.to_node);
|
2023-02-21 12:33:32 +01:00
|
|
|
*edge_opt = None;
|
2023-02-21 09:46:00 +01:00
|
|
|
|
2023-02-21 12:55:44 +01:00
|
|
|
let from_node_edges = &mut self.node_edges[from_node as usize];
|
|
|
|
from_node_edges.remove(edge_index);
|
2023-02-21 09:46:00 +01:00
|
|
|
|
2023-03-07 14:42:58 +01:00
|
|
|
let mut new_successors_from_node = SmallBitmap::new(self.all_edges.len() as u16);
|
|
|
|
let all_edges = &self.all_edges;
|
2023-02-21 12:55:44 +01:00
|
|
|
for from_node_edge in from_node_edges.iter() {
|
2023-03-07 14:42:58 +01:00
|
|
|
let Edge { to_node, .. } = &all_edges[from_node_edge as usize].as_ref().unwrap();
|
2023-02-21 12:55:44 +01:00
|
|
|
new_successors_from_node.insert(*to_node);
|
2023-02-21 09:46:00 +01:00
|
|
|
}
|
2023-02-21 12:55:44 +01:00
|
|
|
self.successors[from_node as usize] = new_successors_from_node;
|
2023-02-21 09:46:00 +01:00
|
|
|
}
|
|
|
|
}
|