Fix more bugs + visual empty path cache logging

This commit is contained in:
Loïc Lecrenier 2023-02-27 15:04:40 +01:00
parent 0e1fbbf7c6
commit 6c85c0d95e
9 changed files with 107 additions and 68 deletions

View File

@ -71,30 +71,29 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
assert!(universe.len() > 1); assert!(universe.len() > 1);
let mut state = self.state.take().unwrap(); let mut state = self.state.take().unwrap();
let Some(cheapest_paths_state) = state.cheapest_paths_state.take() else { let Some(mut cheapest_paths_state) = state.cheapest_paths_state.take() else {
return Ok(None); return Ok(None);
}; };
let mut paths = PathsMap::default(); let mut paths = PathsMap::default();
if let Some(next_cheapest_paths_state) = cheapest_paths_state while paths.is_empty() {
.compute_paths_of_next_lowest_cost( if let Some(next_cheapest_paths_state) = cheapest_paths_state
&mut state.graph, .compute_paths_of_next_lowest_cost(
&state.empty_paths_cache, &mut state.graph,
&mut paths, &state.empty_paths_cache,
) &mut paths,
{ )
state.cheapest_paths_state = Some(next_cheapest_paths_state); {
} else { cheapest_paths_state = next_cheapest_paths_state;
state.cheapest_paths_state = None; } else {
self.state = None;
return Ok(None);
}
} }
state.cheapest_paths_state = Some(cheapest_paths_state);
if paths.is_empty() { G::log_state(&state.graph, &paths, &state.empty_paths_cache, logger);
self.state = None;
return Ok(None);
}
G::log_state(&state.graph, &paths, logger);
let bucket = state.graph.resolve_paths( let bucket = state.graph.resolve_paths(
index, index,

View File

@ -6,6 +6,7 @@ use std::{io::Write, path::PathBuf};
use crate::new::QueryNode; use crate::new::QueryNode;
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::new::ranking_rule_graph::empty_paths_cache::EmptyPathsCache;
use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait}; use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
use crate::new::ranking_rule_graph::{ use crate::new::ranking_rule_graph::{
paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph, paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph,
@ -36,6 +37,7 @@ pub enum SearchEvents {
ProximityState { ProximityState {
graph: RankingRuleGraph<ProximityGraph>, graph: RankingRuleGraph<ProximityGraph>,
paths: PathsMap<u64>, paths: PathsMap<u64>,
empty_paths_cache: EmptyPathsCache,
}, },
} }
@ -107,16 +109,16 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
universe: universe.clone(), universe: universe.clone(),
}) })
} }
fn add_to_results(&mut self, docids: &RoaringBitmap) { fn add_to_results(&mut self, docids: &mut dyn Iterator<Item = u32>) {
self.events.push(SearchEvents::ExtendResults { new: docids.clone() }); self.events.push(SearchEvents::ExtendResults { new: docids.collect() });
} }
fn log_words_state(&mut self, query_graph: &QueryGraph) { fn log_words_state(&mut self, query_graph: &QueryGraph) {
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() }); self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
} }
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &PathsMap<u64>,) { fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache) {
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone() }) self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone(), empty_paths_cache: empty_paths_cache.clone() })
} }
@ -224,13 +226,13 @@ results.{random} {{
link: \"{id}.d2.svg\" link: \"{id}.d2.svg\"
}}").unwrap(); }}").unwrap();
}, },
SearchEvents::ProximityState { graph, paths } => { SearchEvents::ProximityState { graph, paths, empty_paths_cache } => {
let cur_ranking_rule = timestamp.len() - 1; let cur_ranking_rule = timestamp.len() - 1;
let cur_activated_id = activated_id(&timestamp); let cur_activated_id = activated_id(&timestamp);
let id = format!("{cur_ranking_rule}.{cur_activated_id}"); let id = format!("{cur_ranking_rule}.{cur_activated_id}");
let mut new_file_path = self.folder_path.join(format!("{id}.d2")); let mut new_file_path = self.folder_path.join(format!("{id}.d2"));
let mut new_file = std::fs::File::create(new_file_path).unwrap(); let mut new_file = std::fs::File::create(new_file_path).unwrap();
Self::proximity_graph_d2_description(graph, paths, &mut new_file); Self::proximity_graph_d2_description(graph, paths, empty_paths_cache, &mut new_file);
writeln!( writeln!(
&mut file, &mut file,
"{id} {{ "{id} {{
@ -288,7 +290,7 @@ shape: class").unwrap();
} }
} }
} }
fn proximity_graph_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, file: &mut File) { fn proximity_graph_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache, file: &mut File) {
writeln!(file,"direction: right").unwrap(); writeln!(file,"direction: right").unwrap();
writeln!(file, "Proximity Graph {{").unwrap(); writeln!(file, "Proximity Graph {{").unwrap();
@ -322,35 +324,48 @@ shape: class").unwrap();
writeln!(file, "Shortest Paths {{").unwrap(); writeln!(file, "Shortest Paths {{").unwrap();
Self::paths_d2_description(graph, "", paths, file); Self::paths_d2_description(graph, "", paths, file);
writeln!(file, "}}").unwrap(); writeln!(file, "}}").unwrap();
}
fn paths_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, paths: &PathsMap<u64>, file: &mut File) {
writeln!(file, "Empty Path Prefixes {{").unwrap();
Self::paths_d2_description(graph, "", &empty_paths_cache.empty_prefixes, file);
writeln!(file, "}}").unwrap();
writeln!(file, "Removed Edges {{").unwrap();
for edge_idx in empty_paths_cache.empty_edges.iter() {
writeln!(file, "{edge_idx}").unwrap();
}
writeln!(file, "}}").unwrap();
}
fn edge_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, edge_idx: u32, file: &mut File) -> String {
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
let from_node = &graph.query_graph.nodes[*from_node as usize];
let from_node_desc = match from_node {
QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase(_) => todo!(),
QueryTerm::Word { derivations } => derivations.original.clone(),
},
QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(),
QueryNode::End => "END".to_owned(),
};
let to_node = &graph.query_graph.nodes[*to_node as usize];
let to_node_desc = match to_node {
QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase(_) => todo!(),
QueryTerm::Word { derivations } => derivations.original.clone(),
},
QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(),
QueryNode::End => "END".to_owned(),
};
let edge_id = format!("{paths_idx}{edge_idx}");
writeln!(file, "{edge_id}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
shape: class
}}").unwrap();
edge_id
}
fn paths_d2_description<T>(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, paths: &PathsMap<T>, file: &mut File) {
for (edge_idx, rest) in paths.nodes.iter() { for (edge_idx, rest) in paths.nodes.iter() {
let Edge { from_node, to_node, cost, .. } = graph.all_edges[*edge_idx as usize].as_ref().unwrap() ; let edge_id = Self::edge_d2_description(graph, paths_idx, *edge_idx, file);
let from_node = &graph.query_graph.nodes[*from_node as usize];
let from_node_desc = match from_node {
QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase(_) => todo!(),
QueryTerm::Word { derivations } => derivations.original.clone(),
},
QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(),
QueryNode::End => "END".to_owned(),
};
let to_node = &graph.query_graph.nodes[*to_node as usize];
let to_node_desc = match to_node {
QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase(_) => todo!(),
QueryTerm::Word { derivations } => derivations.original.clone(),
},
QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(),
QueryNode::End => "END".to_owned(),
};
let edge_id = format!("{paths_idx}{edge_idx}");
writeln!(file, "{edge_id}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
shape: class
}}").unwrap();
for (dest_edge_idx, _) in rest.nodes.iter() { for (dest_edge_idx, _) in rest.nodes.iter() {
let dest_edge_id = format!("{paths_idx}{edge_idx}{dest_edge_idx}"); let dest_edge_id = format!("{paths_idx}{edge_idx}{dest_edge_idx}");
writeln!(file, "{edge_id} -> {dest_edge_id}").unwrap(); writeln!(file, "{edge_id} -> {dest_edge_id}").unwrap();

View File

@ -5,7 +5,10 @@ use roaring::RoaringBitmap;
use super::{ use super::{
query_graph, query_graph,
ranking_rule_graph::{paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph}, ranking_rule_graph::{
empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, proximity::ProximityGraph,
RankingRuleGraph,
},
QueryGraph, RankingRule, RankingRuleQueryTrait, QueryGraph, RankingRule, RankingRuleQueryTrait,
}; };
@ -41,7 +44,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
) { ) {
} }
fn add_to_results(&mut self, docids: &RoaringBitmap) {} fn add_to_results(&mut self, docids: &mut dyn Iterator<Item = u32>) {}
fn log_words_state(&mut self, query_graph: &Q) {} fn log_words_state(&mut self, query_graph: &Q) {}
@ -49,6 +52,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
&mut self, &mut self,
query_graph: &RankingRuleGraph<ProximityGraph>, query_graph: &RankingRuleGraph<ProximityGraph>,
paths_map: &PathsMap<u64>, paths_map: &PathsMap<u64>,
empty_paths_cache: &EmptyPathsCache,
) { ) {
} }
} }
@ -78,7 +82,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
ranking_rule: &dyn RankingRule<'transaction, Q>, ranking_rule: &dyn RankingRule<'transaction, Q>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
); );
fn add_to_results(&mut self, docids: &RoaringBitmap); fn add_to_results(&mut self, docids: &mut dyn Iterator<Item = u32>);
fn log_words_state(&mut self, query_graph: &Q); fn log_words_state(&mut self, query_graph: &Q);
@ -86,5 +90,6 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
&mut self, &mut self,
query_graph: &RankingRuleGraph<ProximityGraph>, query_graph: &RankingRuleGraph<ProximityGraph>,
paths: &PathsMap<u64>, paths: &PathsMap<u64>,
empty_paths_cache: &EmptyPathsCache,
); );
} }

View File

@ -81,7 +81,9 @@ impl KCheapestPathsState {
while self.kth_cheapest_path.cost <= cur_cost { while self.kth_cheapest_path.cost <= cur_cost {
if let Some(next_self) = self.compute_next_cheapest_paths(graph, empty_paths_cache) { if let Some(next_self) = self.compute_next_cheapest_paths(graph, empty_paths_cache) {
self = next_self; self = next_self;
if self.kth_cheapest_path.cost == cur_cost { if self.kth_cheapest_path.cost == cur_cost
&& !empty_paths_cache.path_is_empty(&self.kth_cheapest_path.edges)
{
into_map.add_path(&self.kth_cheapest_path); into_map.add_path(&self.kth_cheapest_path);
} else { } else {
break; break;
@ -148,7 +150,13 @@ impl KCheapestPathsState {
while let Some((next_cheapest_path, cost2)) = next_cheapest_paths.remove_first() { while let Some((next_cheapest_path, cost2)) = next_cheapest_paths.remove_first() {
assert_eq!(cost, cost2); assert_eq!(cost, cost2);
if empty_paths_cache.path_is_empty(&next_cheapest_path) { // NOTE: it is important not to discard the paths that are forbidden due to a
// forbidden prefix, because the cheapest path algorithm (Dijkstra) cannot take
// this property into account.
if next_cheapest_path
.iter()
.any(|edge_index| graph.all_edges[*edge_index as usize].is_none())
{
continue; continue;
} else { } else {
self.cheapest_paths.insert(next_cheapest_path.iter().copied(), cost); self.cheapest_paths.insert(next_cheapest_path.iter().copied(), cost);

View File

@ -4,12 +4,16 @@ use roaring::RoaringBitmap;
use super::paths_map::PathsMap; use super::paths_map::PathsMap;
#[derive(Default)] #[derive(Default, Clone)]
pub struct EmptyPathsCache { pub struct EmptyPathsCache {
pub empty_edges: RoaringBitmap, pub empty_edges: RoaringBitmap,
pub empty_prefixes: PathsMap<()>, pub empty_prefixes: PathsMap<()>,
} }
impl EmptyPathsCache { impl EmptyPathsCache {
pub fn forbid_edge(&mut self, edge_idx: u32) {
self.empty_edges.insert(edge_idx);
self.empty_prefixes.remove_edge(&edge_idx);
}
pub fn path_is_empty(&self, path: &[u32]) -> bool { pub fn path_is_empty(&self, path: &[u32]) -> bool {
for edge in path { for edge in path {
if self.empty_edges.contains(*edge) { if self.empty_edges.contains(*edge) {

View File

@ -11,6 +11,7 @@ use std::ops::ControlFlow;
use heed::RoTxn; use heed::RoTxn;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use self::empty_paths_cache::EmptyPathsCache;
use self::paths_map::PathsMap; use self::paths_map::PathsMap;
use super::db_cache::DatabaseCache; use super::db_cache::DatabaseCache;
@ -82,6 +83,7 @@ pub trait RankingRuleGraphTrait: Sized {
fn log_state( fn log_state(
graph: &RankingRuleGraph<Self>, graph: &RankingRuleGraph<Self>,
paths: &PathsMap<u64>, paths: &PathsMap<u64>,
empty_paths_cache: &EmptyPathsCache,
logger: &mut dyn SearchLogger<QueryGraph>, logger: &mut dyn SearchLogger<QueryGraph>,
); );
} }

View File

@ -3,6 +3,7 @@ pub mod compute_docids;
use heed::RoTxn; use heed::RoTxn;
use super::empty_paths_cache::EmptyPathsCache;
use super::paths_map::PathsMap; use super::paths_map::PathsMap;
use super::{Edge, EdgeDetails, RankingRuleGraphTrait}; use super::{Edge, EdgeDetails, RankingRuleGraphTrait};
use crate::new::db_cache::DatabaseCache; use crate::new::db_cache::DatabaseCache;
@ -67,8 +68,9 @@ impl RankingRuleGraphTrait for ProximityGraph {
fn log_state( fn log_state(
graph: &super::RankingRuleGraph<Self>, graph: &super::RankingRuleGraph<Self>,
paths: &PathsMap<u64>, paths: &PathsMap<u64>,
empty_paths_cache: &EmptyPathsCache,
logger: &mut dyn crate::new::logger::SearchLogger<crate::new::QueryGraph>, logger: &mut dyn crate::new::logger::SearchLogger<crate::new::QueryGraph>,
) { ) {
logger.log_proximity_state(graph, paths); logger.log_proximity_state(graph, paths, empty_paths_cache);
} }
} }

View File

@ -40,7 +40,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
BitmapOrAllRef::Bitmap(edge_docids) => { BitmapOrAllRef::Bitmap(edge_docids) => {
if edge_docids.is_disjoint(universe) { if edge_docids.is_disjoint(universe) {
// 1. Store in the cache that this edge is empty for this universe // 1. Store in the cache that this edge is empty for this universe
empty_paths_cache.empty_edges.insert(edge_index); empty_paths_cache.forbid_edge(edge_index);
// 2. remove all the paths that contain this edge for this universe // 2. remove all the paths that contain this edge for this universe
paths.remove_edge(&edge_index); paths.remove_edge(&edge_index);
// 3. remove this edge from the proximity graph // 3. remove this edge from the proximity graph

View File

@ -139,6 +139,7 @@ pub fn execute_search<'transaction>(
candidates[0] = universe.clone(); candidates[0] = universe.clone();
let mut cur_ranking_rule_index = 0; let mut cur_ranking_rule_index = 0;
macro_rules! back { macro_rules! back {
() => { () => {
logger.end_iteration_ranking_rule( logger.end_iteration_ranking_rule(
@ -157,13 +158,20 @@ pub fn execute_search<'transaction>(
} }
let mut results = vec![]; let mut results = vec![];
macro_rules! add_to_results {
($candidates:expr) => {
logger.add_to_results(&mut $candidates.iter().take(20 - results.len()));
let iter = $candidates.iter().take(20 - results.len());
results.extend(iter);
};
}
// TODO: skip buckets when we want to start from an offset // TODO: skip buckets when we want to start from an offset
while results.len() < 20 { while results.len() < 20 {
// The universe for this bucket is zero or one element, so we don't need to sort // The universe for this bucket is zero or one element, so we don't need to sort
// anything, just extend the results and go back to the parent ranking rule. // anything, just extend the results and go back to the parent ranking rule.
if candidates[cur_ranking_rule_index].len() <= 1 { if candidates[cur_ranking_rule_index].len() <= 1 {
logger.add_to_results(&candidates[cur_ranking_rule_index]); add_to_results!(candidates[cur_ranking_rule_index]);
results.extend(&candidates[cur_ranking_rule_index]);
back!(); back!();
continue; continue;
} }
@ -183,15 +191,12 @@ pub fn execute_search<'transaction>(
if next_bucket.candidates.len() <= 1 { if next_bucket.candidates.len() <= 1 {
// Only zero or one candidate, no need to sort through the child ranking rule. // Only zero or one candidate, no need to sort through the child ranking rule.
logger.add_to_results(&next_bucket.candidates); add_to_results!(next_bucket.candidates);
results.extend(next_bucket.candidates);
continue; continue;
} else { } else {
// many candidates, give to next ranking rule, if any // many candidates, give to next ranking rule, if any
if cur_ranking_rule_index == ranking_rules_len - 1 { if cur_ranking_rule_index == ranking_rules_len - 1 {
// TODO: don't extend too much, up to the limit only add_to_results!(next_bucket.candidates);
logger.add_to_results(&next_bucket.candidates);
results.extend(next_bucket.candidates);
} else { } else {
cur_ranking_rule_index += 1; cur_ranking_rule_index += 1;
candidates[cur_ranking_rule_index] = next_bucket.candidates.clone(); candidates[cur_ranking_rule_index] = next_bucket.candidates.clone();
@ -313,8 +318,7 @@ mod tests {
let mut db_cache = DatabaseCache::default(); let mut db_cache = DatabaseCache::default();
let query_graph = let query_graph =
make_query_graph(&index, &txn, &mut db_cache, "the sun flower is facing the su") make_query_graph(&index, &txn, &mut db_cache, "a a a a a a a a a a").unwrap();
.unwrap();
// TODO: filters + maybe distinct attributes? // TODO: filters + maybe distinct attributes?
let universe = get_start_universe( let universe = get_start_universe(