Improve the visual/detailed search logger

This commit is contained in:
Loïc Lecrenier 2023-02-23 13:13:19 +01:00
parent 6ba4d5e987
commit 173e37584c
9 changed files with 192 additions and 24 deletions

View File

@ -94,6 +94,8 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
return Ok(None); return Ok(None);
} }
G::log_state(&state.graph, &paths, logger);
let bucket = state.graph.resolve_paths( let bucket = state.graph.resolve_paths(
index, index,
txn, txn,

View File

@ -5,6 +5,8 @@ use std::path::Path;
use std::{io::Write, path::PathBuf}; use std::{io::Write, path::PathBuf};
use crate::new::QueryNode; use crate::new::QueryNode;
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
use crate::new::ranking_rule_graph::{ use crate::new::ranking_rule_graph::{
paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph, paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph,
}; };
@ -113,6 +115,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() }); self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
} }
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &PathsMap<u64>,) {
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone() })
}
} }
impl DetailedSearchLogger { impl DetailedSearchLogger {
@ -129,7 +136,7 @@ impl DetailedSearchLogger {
} }
let index_path = self.folder_path.join("index.d2"); let index_path = self.folder_path.join("index.d2");
let mut file = std::fs::File::create(&index_path).unwrap(); let mut file = std::fs::File::create(index_path).unwrap();
writeln!(&mut file, "Control Flow Between Ranking Rules: {{").unwrap(); writeln!(&mut file, "Control Flow Between Ranking Rules: {{").unwrap();
writeln!(&mut file, "shape: sequence_diagram"); writeln!(&mut file, "shape: sequence_diagram");
for (idx, rr_id) in self.ranking_rules_ids.as_ref().unwrap().iter().enumerate() { for (idx, rr_id) in self.ranking_rules_ids.as_ref().unwrap().iter().enumerate() {
@ -210,29 +217,143 @@ results.{random} {{
let id = format!("{cur_ranking_rule}.{cur_activated_id}"); let id = format!("{cur_ranking_rule}.{cur_activated_id}");
let mut new_file_path = self.folder_path.join(format!("{id}.d2")); let mut new_file_path = self.folder_path.join(format!("{id}.d2"));
let mut new_file = std::fs::File::create(new_file_path).unwrap(); let mut new_file = std::fs::File::create(new_file_path).unwrap();
Self::query_graph_d2_description(&query_graph, &mut new_file); Self::query_graph_d2_description(query_graph, &mut new_file);
writeln!(
&mut file,
"{id} {{
link: \"{id}.d2.svg\"
}}").unwrap();
},
SearchEvents::ProximityState { graph, paths } => {
let cur_ranking_rule = timestamp.len() - 1;
let cur_activated_id = activated_id(&timestamp);
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
let mut new_file_path = self.folder_path.join(format!("{id}.d2"));
let mut new_file = std::fs::File::create(new_file_path).unwrap();
Self::proximity_graph_d2_description(graph, paths, &mut new_file);
writeln!( writeln!(
&mut file, &mut file,
"{id} {{ "{id} {{
link: \"{id}.d2.svg\" link: \"{id}.d2.svg\"
}}").unwrap(); }}").unwrap();
}, },
SearchEvents::ProximityState { graph, paths } => todo!(),
} }
} }
writeln!(&mut file, "}}"); writeln!(&mut file, "}}");
} }
fn query_node_d2_desc(node_idx: usize, node: &QueryNode, file: &mut File) {
match &node {
QueryNode::Term(LocatedQueryTerm { value, positions }) => {
match value {
QueryTerm::Phrase(_) => todo!(),
QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db } } => {
writeln!(file,"{node_idx} : \"{original}\" {{
shape: class").unwrap();
for w in zero_typo {
writeln!(file, "\"{w}\" : 0").unwrap();
}
for w in one_typo {
writeln!(file, "\"{w}\" : 1").unwrap();
}
for w in two_typos {
writeln!(file, "\"{w}\" : 2").unwrap();
}
if *use_prefix_db {
writeln!(file, "use prefix DB : true").unwrap();
}
writeln!(file, "}}").unwrap();
},
}
},
QueryNode::Deleted => panic!(),
QueryNode::Start => {
writeln!(file,"{node_idx} : START").unwrap();
},
QueryNode::End => {
writeln!(file,"{node_idx} : END").unwrap();
},
}
}
fn query_graph_d2_description(query_graph: &QueryGraph, file: &mut File) { fn query_graph_d2_description(query_graph: &QueryGraph, file: &mut File) {
writeln!(file,"direction: right"); writeln!(file,"direction: right");
for node in 0..query_graph.nodes.len() { for node in 0..query_graph.nodes.len() {
if matches!(query_graph.nodes[node], QueryNode::Deleted) { if matches!(query_graph.nodes[node], QueryNode::Deleted) {
continue; continue;
} }
writeln!(file,"{node}"); Self::query_node_d2_desc(node, &query_graph.nodes[node], file);
for edge in query_graph.edges[node].successors.iter() { for edge in query_graph.edges[node].successors.iter() {
writeln!(file, "{node} -> {edge};\n").unwrap(); writeln!(file, "{node} -> {edge};\n").unwrap();
} }
} }
} }
fn proximity_graph_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, file: &mut File) {
writeln!(file,"direction: right").unwrap();
writeln!(file, "Proximity Graph {{").unwrap();
for (node_idx, node) in graph.query_graph.nodes.iter().enumerate() {
if matches!(node, QueryNode::Deleted) {
continue;
}
Self::query_node_d2_desc(node_idx, node, file);
}
for edge in graph.all_edges.iter().flatten() {
let Edge { from_node, to_node, cost, details } = edge;
match &details {
EdgeDetails::Unconditional => {
writeln!(file,
"{from_node} -> {to_node} : \"always cost {cost}\"",
cost = edge.cost,
);
}
EdgeDetails::Data(details) => {
writeln!(file,
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
cost = edge.cost,
edge_label = ProximityGraph::graphviz_edge_details_label(details)
);
}
}
}
writeln!(file, "}}").unwrap();
writeln!(file, "Shortest Paths {{").unwrap();
Self::paths_d2_description(graph, paths, file);
writeln!(file, "}}").unwrap();
}
fn paths_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, file: &mut File) {
for (edge_idx, rest) in paths.nodes.iter() {
let Edge { from_node, to_node, cost, .. } = graph.all_edges[*edge_idx as usize].as_ref().unwrap();
let from_node = &graph.query_graph.nodes[*from_node as usize];
let from_node_desc = match from_node {
QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase(_) => todo!(),
QueryTerm::Word { derivations } => derivations.original.clone(),
},
QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(),
QueryNode::End => "END".to_owned(),
};
let to_node = &graph.query_graph.nodes[*to_node as usize];
let to_node_desc = match to_node {
QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase(_) => todo!(),
QueryTerm::Word { derivations } => derivations.original.clone(),
},
QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(),
QueryNode::End => "END".to_owned(),
};
writeln!(file, "{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
shape: class
}}").unwrap();
for (dest_edge_idx, _) in rest.nodes.iter() {
writeln!(file, "{edge_idx} -> {dest_edge_idx}").unwrap();
}
Self::paths_d2_description(graph, rest, file);
}
}
} }

View File

@ -1,8 +1,13 @@
#[cfg(test)]
pub mod detailed; pub mod detailed;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{query_graph, QueryGraph, RankingRule, RankingRuleQueryTrait}; use super::{
query_graph,
ranking_rule_graph::{paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph},
QueryGraph, RankingRule, RankingRuleQueryTrait,
};
pub struct DefaultSearchLogger; pub struct DefaultSearchLogger;
impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger { impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
@ -39,6 +44,13 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
fn add_to_results(&mut self, docids: &RoaringBitmap) {} fn add_to_results(&mut self, docids: &RoaringBitmap) {}
fn log_words_state(&mut self, query_graph: &Q) {} fn log_words_state(&mut self, query_graph: &Q) {}
fn log_proximity_state(
&mut self,
query_graph: &RankingRuleGraph<ProximityGraph>,
paths_map: &PathsMap<u64>,
) {
}
} }
pub trait SearchLogger<Q: RankingRuleQueryTrait> { pub trait SearchLogger<Q: RankingRuleQueryTrait> {
@ -69,4 +81,10 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
fn add_to_results(&mut self, docids: &RoaringBitmap); fn add_to_results(&mut self, docids: &RoaringBitmap);
fn log_words_state(&mut self, query_graph: &Q); fn log_words_state(&mut self, query_graph: &Q);
fn log_proximity_state(
&mut self,
query_graph: &RankingRuleGraph<ProximityGraph>,
paths: &PathsMap<u64>,
);
} }

View File

@ -51,7 +51,7 @@ pub fn word_derivations(
let mut two_typos = vec![]; let mut two_typos = vec![];
if max_typo == 0 { if max_typo == 0 {
if is_prefix { if is_prefix && !use_prefix_db {
let prefix = Str::new(word).starts_with(); let prefix = Str::new(word).starts_with();
let mut stream = fst.search(prefix).into_stream(); let mut stream = fst.search(prefix).into_stream();

View File

@ -11,7 +11,10 @@ use std::ops::ControlFlow;
use heed::RoTxn; use heed::RoTxn;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use self::paths_map::PathsMap;
use super::db_cache::DatabaseCache; use super::db_cache::DatabaseCache;
use super::logger::SearchLogger;
use super::{QueryGraph, QueryNode}; use super::{QueryGraph, QueryNode};
use crate::{Index, Result}; use crate::{Index, Result};
@ -23,10 +26,10 @@ pub enum EdgeDetails<E> {
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct Edge<E> { pub struct Edge<E> {
from_node: u32, pub from_node: u32,
to_node: u32, pub to_node: u32,
cost: u8, pub cost: u8,
details: EdgeDetails<E>, pub details: EdgeDetails<E>,
} }
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
@ -35,11 +38,11 @@ pub struct EdgePointer<'graph, E> {
pub edge: &'graph Edge<E>, pub edge: &'graph Edge<E>,
} }
pub trait RankingRuleGraphTrait { pub trait RankingRuleGraphTrait: Sized {
/// The details of an edge connecting two query nodes. These details /// The details of an edge connecting two query nodes. These details
/// should be sufficient to compute the edge's cost and associated document ids /// should be sufficient to compute the edge's cost and associated document ids
/// in [`compute_docids`](RankingRuleGraphTrait). /// in [`compute_docids`](RankingRuleGraphTrait).
type EdgeDetails: Sized; type EdgeDetails: Sized + Clone;
type BuildVisitedFromNode; type BuildVisitedFromNode;
@ -75,6 +78,12 @@ pub trait RankingRuleGraphTrait {
to_node: &QueryNode, to_node: &QueryNode,
from_node_data: &'from_data Self::BuildVisitedFromNode, from_node_data: &'from_data Self::BuildVisitedFromNode,
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>; ) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>;
fn log_state(
graph: &RankingRuleGraph<Self>,
paths: &PathsMap<u64>,
logger: &mut dyn SearchLogger<QueryGraph>,
);
} }
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> { pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
@ -90,6 +99,16 @@ pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
// 2. get node_incoming_edges[to] // 2. get node_incoming_edges[to]
// 3. take intersection betweem the two // 3. take intersection betweem the two
} }
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
fn clone(&self) -> Self {
Self {
query_graph: self.query_graph.clone(),
all_edges: self.all_edges.clone(),
node_edges: self.node_edges.clone(),
successors: self.successors.clone(),
}
}
}
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
// Visit all edges between the two given nodes in order of increasing cost. // Visit all edges between the two given nodes in order of increasing cost.
pub fn visit_edges<'graph, O>( pub fn visit_edges<'graph, O>(

View File

@ -9,10 +9,10 @@ use super::cheapest_paths::Path;
use super::{Edge, EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; use super::{Edge, EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::QueryNode; use crate::new::QueryNode;
#[derive(Debug)] #[derive(Debug, Clone)]
pub struct PathsMap<V> { pub struct PathsMap<V> {
nodes: Vec<(u32, PathsMap<V>)>, pub nodes: Vec<(u32, PathsMap<V>)>,
value: Option<V>, pub value: Option<V>,
} }
impl<V> Default for PathsMap<V> { impl<V> Default for PathsMap<V> {
fn default() -> Self { fn default() -> Self {

View File

@ -3,6 +3,7 @@ pub mod compute_docids;
use heed::RoTxn; use heed::RoTxn;
use super::paths_map::PathsMap;
use super::{Edge, EdgeDetails, RankingRuleGraphTrait}; use super::{Edge, EdgeDetails, RankingRuleGraphTrait};
use crate::new::db_cache::DatabaseCache; use crate::new::db_cache::DatabaseCache;
use crate::new::query_term::WordDerivations; use crate::new::query_term::WordDerivations;
@ -18,6 +19,7 @@ pub enum WordPair {
WordPrefixSwapped { left: String, right_prefix: String }, WordPrefixSwapped { left: String, right_prefix: String },
} }
#[derive(Clone)]
pub struct ProximityEdge { pub struct ProximityEdge {
pairs: Vec<WordPair>, pairs: Vec<WordPair>,
proximity: u8, proximity: u8,
@ -61,4 +63,12 @@ impl RankingRuleGraphTrait for ProximityGraph {
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> { ) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
build::visit_to_node(index, txn, db_cache, to_node, from_node_data) build::visit_to_node(index, txn, db_cache, to_node, from_node_data)
} }
fn log_state(
graph: &super::RankingRuleGraph<Self>,
paths: &PathsMap<u64>,
logger: &mut dyn crate::new::logger::SearchLogger<crate::new::QueryGraph>,
) {
logger.log_proximity_state(graph, paths);
}
} }

View File

@ -270,8 +270,7 @@ mod tests {
let mut db_cache = DatabaseCache::default(); let mut db_cache = DatabaseCache::default();
let query_graph = let query_graph =
make_query_graph(&index, &txn, &mut db_cache, "the quick brown fox jumps over") make_query_graph(&index, &txn, &mut db_cache, "b b b b b b b b b b").unwrap();
.unwrap();
println!("{}", query_graph.graphviz()); println!("{}", query_graph.graphviz());
logger.initial_query(&query_graph); logger.initial_query(&query_graph);
@ -314,8 +313,7 @@ mod tests {
let mut db_cache = DatabaseCache::default(); let mut db_cache = DatabaseCache::default();
let query_graph = let query_graph =
make_query_graph(&index, &txn, &mut db_cache, "released from prison by the government") make_query_graph(&index, &txn, &mut db_cache, "b b b b b b b b b b").unwrap();
.unwrap();
// TODO: filters + maybe distinct attributes? // TODO: filters + maybe distinct attributes?
let universe = get_start_universe( let universe = get_start_universe(
@ -335,7 +333,7 @@ mod tests {
&mut db_cache, &mut db_cache,
&universe, &universe,
&query_graph, &query_graph,
&mut logger, /* 0, 20 */ &mut logger, //&mut DefaultSearchLogger, /* 0, 20 */
) )
.unwrap(); .unwrap();

View File

@ -47,8 +47,6 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
self.exhausted = false; self.exhausted = false;
self.query_graph = Some(parent_query_graph.clone()); self.query_graph = Some(parent_query_graph.clone());
logger.log_words_state(parent_query_graph);
// TODO: a phrase can contain many positions, but represents a single node. // TODO: a phrase can contain many positions, but represents a single node.
// That's a problem. // That's a problem.
let positions_to_remove = match self.terms_matching_strategy { let positions_to_remove = match self.terms_matching_strategy {
@ -83,11 +81,14 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
// println!("Words: next bucket"); // println!("Words: next bucket");
assert!(self.iterating); assert!(self.iterating);
assert!(universe.len() > 1); assert!(universe.len() > 1);
if self.exhausted { if self.exhausted {
return Ok(None); return Ok(None);
} }
let Some(query_graph) = &mut self.query_graph else { panic!() }; let Some(query_graph) = &mut self.query_graph else { panic!() };
logger.log_words_state(query_graph);
let this_bucket = resolve_query_graph( let this_bucket = resolve_query_graph(
index, index,
txn, txn,
@ -107,7 +108,6 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
let position_to_remove = self.positions_to_remove.pop().unwrap(); let position_to_remove = self.positions_to_remove.pop().unwrap();
query_graph.remove_words_at_position(position_to_remove); query_graph.remove_words_at_position(position_to_remove);
} }
logger.log_words_state(query_graph);
Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket })) Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
} }