Fix more bugs + visual empty path cache logging

This commit is contained in:
Loïc Lecrenier 2023-02-27 15:04:40 +01:00
parent 0e1fbbf7c6
commit 6c85c0d95e
9 changed files with 107 additions and 68 deletions

View File

@ -71,12 +71,13 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
assert!(universe.len() > 1);
let mut state = self.state.take().unwrap();
let Some(cheapest_paths_state) = state.cheapest_paths_state.take() else {
let Some(mut cheapest_paths_state) = state.cheapest_paths_state.take() else {
return Ok(None);
};
let mut paths = PathsMap::default();
while paths.is_empty() {
if let Some(next_cheapest_paths_state) = cheapest_paths_state
.compute_paths_of_next_lowest_cost(
&mut state.graph,
@ -84,17 +85,15 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
&mut paths,
)
{
state.cheapest_paths_state = Some(next_cheapest_paths_state);
cheapest_paths_state = next_cheapest_paths_state;
} else {
state.cheapest_paths_state = None;
}
if paths.is_empty() {
self.state = None;
return Ok(None);
}
}
state.cheapest_paths_state = Some(cheapest_paths_state);
G::log_state(&state.graph, &paths, logger);
G::log_state(&state.graph, &paths, &state.empty_paths_cache, logger);
let bucket = state.graph.resolve_paths(
index,

View File

@ -6,6 +6,7 @@ use std::{io::Write, path::PathBuf};
use crate::new::QueryNode;
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::new::ranking_rule_graph::empty_paths_cache::EmptyPathsCache;
use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
use crate::new::ranking_rule_graph::{
paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph,
@ -36,6 +37,7 @@ pub enum SearchEvents {
ProximityState {
graph: RankingRuleGraph<ProximityGraph>,
paths: PathsMap<u64>,
empty_paths_cache: EmptyPathsCache,
},
}
@ -107,16 +109,16 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
universe: universe.clone(),
})
}
fn add_to_results(&mut self, docids: &RoaringBitmap) {
self.events.push(SearchEvents::ExtendResults { new: docids.clone() });
fn add_to_results(&mut self, docids: &mut dyn Iterator<Item = u32>) {
self.events.push(SearchEvents::ExtendResults { new: docids.collect() });
}
fn log_words_state(&mut self, query_graph: &QueryGraph) {
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
}
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &PathsMap<u64>,) {
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone() })
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache) {
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone(), empty_paths_cache: empty_paths_cache.clone() })
}
@ -224,13 +226,13 @@ results.{random} {{
link: \"{id}.d2.svg\"
}}").unwrap();
},
SearchEvents::ProximityState { graph, paths } => {
SearchEvents::ProximityState { graph, paths, empty_paths_cache } => {
let cur_ranking_rule = timestamp.len() - 1;
let cur_activated_id = activated_id(&timestamp);
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
let mut new_file_path = self.folder_path.join(format!("{id}.d2"));
let mut new_file = std::fs::File::create(new_file_path).unwrap();
Self::proximity_graph_d2_description(graph, paths, &mut new_file);
Self::proximity_graph_d2_description(graph, paths, empty_paths_cache, &mut new_file);
writeln!(
&mut file,
"{id} {{
@ -288,7 +290,7 @@ shape: class").unwrap();
}
}
}
fn proximity_graph_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, file: &mut File) {
fn proximity_graph_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache, file: &mut File) {
writeln!(file,"direction: right").unwrap();
writeln!(file, "Proximity Graph {{").unwrap();
@ -322,11 +324,19 @@ shape: class").unwrap();
writeln!(file, "Shortest Paths {{").unwrap();
Self::paths_d2_description(graph, "", paths, file);
writeln!(file, "}}").unwrap();
}
fn paths_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, paths: &PathsMap<u64>, file: &mut File) {
for (edge_idx, rest) in paths.nodes.iter() {
let Edge { from_node, to_node, cost, .. } = graph.all_edges[*edge_idx as usize].as_ref().unwrap() ;
writeln!(file, "Empty Path Prefixes {{").unwrap();
Self::paths_d2_description(graph, "", &empty_paths_cache.empty_prefixes, file);
writeln!(file, "}}").unwrap();
writeln!(file, "Removed Edges {{").unwrap();
for edge_idx in empty_paths_cache.empty_edges.iter() {
writeln!(file, "{edge_idx}").unwrap();
}
writeln!(file, "}}").unwrap();
}
fn edge_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, edge_idx: u32, file: &mut File) -> String {
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
let from_node = &graph.query_graph.nodes[*from_node as usize];
let from_node_desc = match from_node {
QueryNode::Term(term) => match &term.value {
@ -351,6 +361,11 @@ shape: class").unwrap();
writeln!(file, "{edge_id}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
shape: class
}}").unwrap();
edge_id
}
fn paths_d2_description<T>(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, paths: &PathsMap<T>, file: &mut File) {
for (edge_idx, rest) in paths.nodes.iter() {
let edge_id = Self::edge_d2_description(graph, paths_idx, *edge_idx, file);
for (dest_edge_idx, _) in rest.nodes.iter() {
let dest_edge_id = format!("{paths_idx}{edge_idx}{dest_edge_idx}");
writeln!(file, "{edge_id} -> {dest_edge_id}").unwrap();

View File

@ -5,7 +5,10 @@ use roaring::RoaringBitmap;
use super::{
query_graph,
ranking_rule_graph::{paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph},
ranking_rule_graph::{
empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, proximity::ProximityGraph,
RankingRuleGraph,
},
QueryGraph, RankingRule, RankingRuleQueryTrait,
};
@ -41,7 +44,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
) {
}
fn add_to_results(&mut self, docids: &RoaringBitmap) {}
fn add_to_results(&mut self, docids: &mut dyn Iterator<Item = u32>) {}
fn log_words_state(&mut self, query_graph: &Q) {}
@ -49,6 +52,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
&mut self,
query_graph: &RankingRuleGraph<ProximityGraph>,
paths_map: &PathsMap<u64>,
empty_paths_cache: &EmptyPathsCache,
) {
}
}
@ -78,7 +82,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
ranking_rule: &dyn RankingRule<'transaction, Q>,
universe: &RoaringBitmap,
);
fn add_to_results(&mut self, docids: &RoaringBitmap);
fn add_to_results(&mut self, docids: &mut dyn Iterator<Item = u32>);
fn log_words_state(&mut self, query_graph: &Q);
@ -86,5 +90,6 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
&mut self,
query_graph: &RankingRuleGraph<ProximityGraph>,
paths: &PathsMap<u64>,
empty_paths_cache: &EmptyPathsCache,
);
}

View File

@ -81,7 +81,9 @@ impl KCheapestPathsState {
while self.kth_cheapest_path.cost <= cur_cost {
if let Some(next_self) = self.compute_next_cheapest_paths(graph, empty_paths_cache) {
self = next_self;
if self.kth_cheapest_path.cost == cur_cost {
if self.kth_cheapest_path.cost == cur_cost
&& !empty_paths_cache.path_is_empty(&self.kth_cheapest_path.edges)
{
into_map.add_path(&self.kth_cheapest_path);
} else {
break;
@ -148,7 +150,13 @@ impl KCheapestPathsState {
while let Some((next_cheapest_path, cost2)) = next_cheapest_paths.remove_first() {
assert_eq!(cost, cost2);
if empty_paths_cache.path_is_empty(&next_cheapest_path) {
// NOTE: it is important not to discard the paths that are forbidden due to a
// forbidden prefix, because the cheapest path algorithm (Dijkstra) cannot take
// this property into account.
if next_cheapest_path
.iter()
.any(|edge_index| graph.all_edges[*edge_index as usize].is_none())
{
continue;
} else {
self.cheapest_paths.insert(next_cheapest_path.iter().copied(), cost);

View File

@ -4,12 +4,16 @@ use roaring::RoaringBitmap;
use super::paths_map::PathsMap;
#[derive(Default)]
#[derive(Default, Clone)]
pub struct EmptyPathsCache {
pub empty_edges: RoaringBitmap,
pub empty_prefixes: PathsMap<()>,
}
impl EmptyPathsCache {
pub fn forbid_edge(&mut self, edge_idx: u32) {
self.empty_edges.insert(edge_idx);
self.empty_prefixes.remove_edge(&edge_idx);
}
pub fn path_is_empty(&self, path: &[u32]) -> bool {
for edge in path {
if self.empty_edges.contains(*edge) {

View File

@ -11,6 +11,7 @@ use std::ops::ControlFlow;
use heed::RoTxn;
use roaring::RoaringBitmap;
use self::empty_paths_cache::EmptyPathsCache;
use self::paths_map::PathsMap;
use super::db_cache::DatabaseCache;
@ -82,6 +83,7 @@ pub trait RankingRuleGraphTrait: Sized {
fn log_state(
graph: &RankingRuleGraph<Self>,
paths: &PathsMap<u64>,
empty_paths_cache: &EmptyPathsCache,
logger: &mut dyn SearchLogger<QueryGraph>,
);
}

View File

@ -3,6 +3,7 @@ pub mod compute_docids;
use heed::RoTxn;
use super::empty_paths_cache::EmptyPathsCache;
use super::paths_map::PathsMap;
use super::{Edge, EdgeDetails, RankingRuleGraphTrait};
use crate::new::db_cache::DatabaseCache;
@ -67,8 +68,9 @@ impl RankingRuleGraphTrait for ProximityGraph {
fn log_state(
graph: &super::RankingRuleGraph<Self>,
paths: &PathsMap<u64>,
empty_paths_cache: &EmptyPathsCache,
logger: &mut dyn crate::new::logger::SearchLogger<crate::new::QueryGraph>,
) {
logger.log_proximity_state(graph, paths);
logger.log_proximity_state(graph, paths, empty_paths_cache);
}
}

View File

@ -40,7 +40,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
BitmapOrAllRef::Bitmap(edge_docids) => {
if edge_docids.is_disjoint(universe) {
// 1. Store in the cache that this edge is empty for this universe
empty_paths_cache.empty_edges.insert(edge_index);
empty_paths_cache.forbid_edge(edge_index);
// 2. remove all the paths that contain this edge for this universe
paths.remove_edge(&edge_index);
// 3. remove this edge from the proximity graph

View File

@ -139,6 +139,7 @@ pub fn execute_search<'transaction>(
candidates[0] = universe.clone();
let mut cur_ranking_rule_index = 0;
macro_rules! back {
() => {
logger.end_iteration_ranking_rule(
@ -157,13 +158,20 @@ pub fn execute_search<'transaction>(
}
let mut results = vec![];
macro_rules! add_to_results {
($candidates:expr) => {
logger.add_to_results(&mut $candidates.iter().take(20 - results.len()));
let iter = $candidates.iter().take(20 - results.len());
results.extend(iter);
};
}
// TODO: skip buckets when we want to start from an offset
while results.len() < 20 {
// The universe for this bucket is zero or one element, so we don't need to sort
// anything, just extend the results and go back to the parent ranking rule.
if candidates[cur_ranking_rule_index].len() <= 1 {
logger.add_to_results(&candidates[cur_ranking_rule_index]);
results.extend(&candidates[cur_ranking_rule_index]);
add_to_results!(candidates[cur_ranking_rule_index]);
back!();
continue;
}
@ -183,15 +191,12 @@ pub fn execute_search<'transaction>(
if next_bucket.candidates.len() <= 1 {
// Only zero or one candidate, no need to sort through the child ranking rule.
logger.add_to_results(&next_bucket.candidates);
results.extend(next_bucket.candidates);
add_to_results!(next_bucket.candidates);
continue;
} else {
// many candidates, give to next ranking rule, if any
if cur_ranking_rule_index == ranking_rules_len - 1 {
// TODO: don't extend too much, up to the limit only
logger.add_to_results(&next_bucket.candidates);
results.extend(next_bucket.candidates);
add_to_results!(next_bucket.candidates);
} else {
cur_ranking_rule_index += 1;
candidates[cur_ranking_rule_index] = next_bucket.candidates.clone();
@ -313,8 +318,7 @@ mod tests {
let mut db_cache = DatabaseCache::default();
let query_graph =
make_query_graph(&index, &txn, &mut db_cache, "the sun flower is facing the su")
.unwrap();
make_query_graph(&index, &txn, &mut db_cache, "a a a a a a a a a a").unwrap();
// TODO: filters + maybe distinct attributes?
let universe = get_start_universe(