2023-03-08 09:55:53 +01:00
|
|
|
use roaring::RoaringBitmap;
|
|
|
|
|
2023-02-22 15:34:37 +01:00
|
|
|
use super::logger::SearchLogger;
|
2023-03-08 09:55:53 +01:00
|
|
|
use super::ranking_rule_graph::{
|
|
|
|
EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait,
|
|
|
|
};
|
2023-03-08 09:53:05 +01:00
|
|
|
use super::small_bitmap::SmallBitmap;
|
2023-03-08 09:55:53 +01:00
|
|
|
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
2023-03-06 19:21:55 +01:00
|
|
|
use crate::Result;
|
2023-02-21 09:48:49 +01:00
|
|
|
|
|
|
|
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
|
2023-02-22 15:34:37 +01:00
|
|
|
id: String,
|
2023-02-21 09:48:49 +01:00
|
|
|
state: Option<GraphBasedRankingRuleState<G>>,
|
|
|
|
}
|
2023-02-22 15:34:37 +01:00
|
|
|
impl<G: RankingRuleGraphTrait> GraphBasedRankingRule<G> {
|
|
|
|
pub fn new(id: String) -> Self {
|
|
|
|
Self { id, state: None }
|
2023-02-21 09:48:49 +01:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|
|
|
graph: RankingRuleGraph<G>,
|
|
|
|
edge_docids_cache: EdgeDocidsCache<G>,
|
|
|
|
empty_paths_cache: EmptyPathsCache,
|
2023-03-08 09:53:05 +01:00
|
|
|
all_distances: Vec<Vec<(u16, SmallBitmap)>>,
|
2023-03-02 21:27:42 +01:00
|
|
|
cur_distance_idx: usize,
|
|
|
|
}
|
|
|
|
|
2023-03-06 19:21:55 +01:00
|
|
|
fn remove_empty_edges<'search, G: RankingRuleGraphTrait>(
|
|
|
|
ctx: &mut SearchContext<'search>,
|
2023-03-02 21:27:42 +01:00
|
|
|
graph: &mut RankingRuleGraph<G>,
|
|
|
|
edge_docids_cache: &mut EdgeDocidsCache<G>,
|
|
|
|
universe: &RoaringBitmap,
|
|
|
|
empty_paths_cache: &mut EmptyPathsCache,
|
|
|
|
) -> Result<()> {
|
2023-03-07 14:42:58 +01:00
|
|
|
for edge_index in 0..graph.all_edges.len() as u16 {
|
2023-03-02 21:27:42 +01:00
|
|
|
if graph.all_edges[edge_index as usize].is_none() {
|
|
|
|
continue;
|
|
|
|
}
|
2023-03-06 19:21:55 +01:00
|
|
|
let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?;
|
2023-03-02 21:27:42 +01:00
|
|
|
match docids {
|
2023-03-07 14:42:58 +01:00
|
|
|
BitmapOrAllRef::Bitmap(docids) => {
|
|
|
|
if docids.is_disjoint(universe) {
|
2023-03-02 21:27:42 +01:00
|
|
|
graph.remove_edge(edge_index);
|
|
|
|
empty_paths_cache.forbid_edge(edge_index);
|
|
|
|
edge_docids_cache.cache.remove(&edge_index);
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
BitmapOrAllRef::All => continue,
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(())
|
2023-02-21 09:48:49 +01:00
|
|
|
}
|
|
|
|
|
2023-03-06 19:21:55 +01:00
|
|
|
impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
2023-02-21 09:48:49 +01:00
|
|
|
for GraphBasedRankingRule<G>
|
|
|
|
{
|
2023-02-22 15:34:37 +01:00
|
|
|
fn id(&self) -> String {
|
|
|
|
self.id.clone()
|
|
|
|
}
|
2023-02-21 09:48:49 +01:00
|
|
|
fn start_iteration(
|
|
|
|
&mut self,
|
2023-03-06 19:21:55 +01:00
|
|
|
ctx: &mut SearchContext<'search>,
|
2023-02-28 11:49:24 +01:00
|
|
|
_logger: &mut dyn SearchLogger<QueryGraph>,
|
2023-03-02 21:27:42 +01:00
|
|
|
universe: &RoaringBitmap,
|
2023-02-21 09:48:49 +01:00
|
|
|
query_graph: &QueryGraph,
|
|
|
|
) -> Result<()> {
|
2023-03-06 19:21:55 +01:00
|
|
|
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
2023-03-02 21:27:42 +01:00
|
|
|
let mut edge_docids_cache = EdgeDocidsCache::default();
|
2023-03-07 14:42:58 +01:00
|
|
|
let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len() as u16);
|
2023-03-02 21:27:42 +01:00
|
|
|
|
|
|
|
remove_empty_edges(
|
2023-03-06 19:21:55 +01:00
|
|
|
ctx,
|
2023-03-02 21:27:42 +01:00
|
|
|
&mut graph,
|
|
|
|
&mut edge_docids_cache,
|
|
|
|
universe,
|
|
|
|
&mut empty_paths_cache,
|
|
|
|
)?;
|
2023-03-08 09:53:05 +01:00
|
|
|
let all_distances = graph.initialize_distances_with_necessary_edges();
|
2023-02-21 09:48:49 +01:00
|
|
|
|
|
|
|
let state = GraphBasedRankingRuleState {
|
|
|
|
graph,
|
2023-03-02 21:27:42 +01:00
|
|
|
edge_docids_cache,
|
|
|
|
empty_paths_cache,
|
|
|
|
all_distances,
|
|
|
|
cur_distance_idx: 0,
|
2023-02-21 09:48:49 +01:00
|
|
|
};
|
|
|
|
|
|
|
|
self.state = Some(state);
|
|
|
|
|
|
|
|
Ok(())
|
|
|
|
}
|
|
|
|
|
|
|
|
fn next_bucket(
|
|
|
|
&mut self,
|
2023-03-06 19:21:55 +01:00
|
|
|
ctx: &mut SearchContext<'search>,
|
2023-02-22 15:34:37 +01:00
|
|
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
2023-02-21 09:48:49 +01:00
|
|
|
universe: &RoaringBitmap,
|
|
|
|
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
|
|
|
assert!(universe.len() > 1);
|
|
|
|
let mut state = self.state.take().unwrap();
|
2023-03-08 09:53:05 +01:00
|
|
|
|
2023-03-02 21:27:42 +01:00
|
|
|
remove_empty_edges(
|
2023-03-06 19:21:55 +01:00
|
|
|
ctx,
|
2023-03-02 21:27:42 +01:00
|
|
|
&mut state.graph,
|
|
|
|
&mut state.edge_docids_cache,
|
|
|
|
universe,
|
|
|
|
&mut state.empty_paths_cache,
|
|
|
|
)?;
|
2023-02-28 14:19:57 +01:00
|
|
|
|
2023-03-02 21:27:42 +01:00
|
|
|
if state.cur_distance_idx
|
|
|
|
>= state.all_distances[state.graph.query_graph.root_node as usize].len()
|
|
|
|
{
|
|
|
|
self.state = None;
|
2023-02-28 14:19:57 +01:00
|
|
|
return Ok(None);
|
|
|
|
}
|
2023-03-08 09:53:05 +01:00
|
|
|
let (cost, _) =
|
2023-03-02 21:27:42 +01:00
|
|
|
state.all_distances[state.graph.query_graph.root_node as usize][state.cur_distance_idx];
|
|
|
|
state.cur_distance_idx += 1;
|
|
|
|
|
2023-03-07 14:42:58 +01:00
|
|
|
let mut bucket = RoaringBitmap::new();
|
|
|
|
|
|
|
|
let GraphBasedRankingRuleState {
|
|
|
|
graph,
|
|
|
|
edge_docids_cache,
|
|
|
|
empty_paths_cache,
|
|
|
|
all_distances,
|
|
|
|
cur_distance_idx: _,
|
|
|
|
} = &mut state;
|
|
|
|
|
|
|
|
let mut paths = vec![];
|
|
|
|
let original_universe = universe;
|
|
|
|
let mut universe = universe.clone();
|
|
|
|
|
2023-03-08 09:53:05 +01:00
|
|
|
// TODO: remove this unnecessary clone
|
|
|
|
let original_graph = graph.clone();
|
2023-03-07 14:42:58 +01:00
|
|
|
graph.visit_paths_of_cost(
|
|
|
|
graph.query_graph.root_node as usize,
|
2023-03-02 21:27:42 +01:00
|
|
|
cost,
|
2023-03-07 14:42:58 +01:00
|
|
|
all_distances,
|
|
|
|
empty_paths_cache,
|
|
|
|
|path, graph, empty_paths_cache| {
|
2023-03-08 09:53:05 +01:00
|
|
|
paths.push(path.to_vec());
|
2023-03-07 14:42:58 +01:00
|
|
|
let mut path_docids = universe.clone();
|
|
|
|
let mut visited_edges = vec![];
|
|
|
|
let mut cached_edge_docids = vec![];
|
|
|
|
for &edge_index in path {
|
|
|
|
visited_edges.push(edge_index);
|
|
|
|
let edge_docids =
|
|
|
|
edge_docids_cache.get_edge_docids(ctx, edge_index, graph, &universe)?;
|
|
|
|
let edge_docids = match edge_docids {
|
|
|
|
BitmapOrAllRef::Bitmap(b) => b,
|
|
|
|
BitmapOrAllRef::All => continue,
|
|
|
|
};
|
|
|
|
cached_edge_docids.push((edge_index, edge_docids.clone()));
|
|
|
|
if edge_docids.is_disjoint(&universe) {
|
|
|
|
// 1. Store in the cache that this edge is empty for this universe
|
|
|
|
empty_paths_cache.forbid_edge(edge_index);
|
|
|
|
// 2. remove this edge from the ranking rule graph
|
|
|
|
graph.remove_edge(edge_index);
|
|
|
|
edge_docids_cache.cache.remove(&edge_index);
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
path_docids &= edge_docids;
|
|
|
|
|
|
|
|
if path_docids.is_disjoint(&universe) {
|
2023-03-08 09:53:05 +01:00
|
|
|
// empty_paths_cache.forbid_prefix(&visited_edges);
|
2023-03-07 14:42:58 +01:00
|
|
|
// if the intersection between this edge and any
|
|
|
|
// previous one is disjoint with the universe,
|
|
|
|
// then we add these two edges to the empty_path_cache
|
|
|
|
for (edge_index2, edge_docids2) in
|
|
|
|
cached_edge_docids[..cached_edge_docids.len() - 1].iter()
|
|
|
|
{
|
|
|
|
let intersection = edge_docids & edge_docids2;
|
|
|
|
if intersection.is_disjoint(&universe) {
|
|
|
|
empty_paths_cache.forbid_couple_edges(*edge_index2, edge_index);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return Ok(());
|
|
|
|
}
|
|
|
|
}
|
|
|
|
bucket |= &path_docids;
|
|
|
|
universe -= path_docids;
|
|
|
|
Ok(())
|
|
|
|
},
|
|
|
|
)?;
|
2023-03-02 21:27:42 +01:00
|
|
|
|
|
|
|
G::log_state(
|
2023-03-08 09:53:05 +01:00
|
|
|
&original_graph,
|
2023-03-02 21:27:42 +01:00
|
|
|
&paths,
|
|
|
|
&state.empty_paths_cache,
|
2023-03-07 14:42:58 +01:00
|
|
|
original_universe,
|
2023-03-02 21:27:42 +01:00
|
|
|
&state.all_distances,
|
|
|
|
cost,
|
|
|
|
logger,
|
|
|
|
);
|
2023-02-23 13:13:19 +01:00
|
|
|
|
2023-02-21 09:48:49 +01:00
|
|
|
let next_query_graph = state.graph.query_graph.clone();
|
|
|
|
|
|
|
|
self.state = Some(state);
|
|
|
|
|
|
|
|
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket }))
|
|
|
|
}
|
|
|
|
|
|
|
|
fn end_iteration(
|
|
|
|
&mut self,
|
2023-03-06 19:21:55 +01:00
|
|
|
_ctx: &mut SearchContext<'search>,
|
2023-02-28 11:49:24 +01:00
|
|
|
_logger: &mut dyn SearchLogger<QueryGraph>,
|
2023-02-21 09:48:49 +01:00
|
|
|
) {
|
|
|
|
self.state = None;
|
|
|
|
}
|
|
|
|
}
|