mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-26 22:50:07 +01:00
Improve the visual/detailed search logger
This commit is contained in:
parent
6ba4d5e987
commit
173e37584c
@ -94,6 +94,8 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
G::log_state(&state.graph, &paths, logger);
|
||||
|
||||
let bucket = state.graph.resolve_paths(
|
||||
index,
|
||||
txn,
|
||||
|
@ -5,6 +5,8 @@ use std::path::Path;
|
||||
use std::{io::Write, path::PathBuf};
|
||||
|
||||
use crate::new::QueryNode;
|
||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
|
||||
use crate::new::ranking_rule_graph::{
|
||||
paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph,
|
||||
};
|
||||
@ -112,6 +114,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
fn log_words_state(&mut self, query_graph: &QueryGraph) {
|
||||
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
|
||||
}
|
||||
|
||||
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &PathsMap<u64>,) {
|
||||
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone() })
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
@ -129,7 +136,7 @@ impl DetailedSearchLogger {
|
||||
}
|
||||
|
||||
let index_path = self.folder_path.join("index.d2");
|
||||
let mut file = std::fs::File::create(&index_path).unwrap();
|
||||
let mut file = std::fs::File::create(index_path).unwrap();
|
||||
writeln!(&mut file, "Control Flow Between Ranking Rules: {{").unwrap();
|
||||
writeln!(&mut file, "shape: sequence_diagram");
|
||||
for (idx, rr_id) in self.ranking_rules_ids.as_ref().unwrap().iter().enumerate() {
|
||||
@ -210,29 +217,143 @@ results.{random} {{
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let mut new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::query_graph_d2_description(&query_graph, &mut new_file);
|
||||
Self::query_graph_d2_description(query_graph, &mut new_file);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
link: \"{id}.d2.svg\"
|
||||
}}").unwrap();
|
||||
},
|
||||
SearchEvents::ProximityState { graph, paths } => {
|
||||
let cur_ranking_rule = timestamp.len() - 1;
|
||||
let cur_activated_id = activated_id(×tamp);
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let mut new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::proximity_graph_d2_description(graph, paths, &mut new_file);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
link: \"{id}.d2.svg\"
|
||||
}}").unwrap();
|
||||
},
|
||||
SearchEvents::ProximityState { graph, paths } => todo!(),
|
||||
}
|
||||
}
|
||||
writeln!(&mut file, "}}");
|
||||
}
|
||||
|
||||
fn query_node_d2_desc(node_idx: usize, node: &QueryNode, file: &mut File) {
|
||||
match &node {
|
||||
QueryNode::Term(LocatedQueryTerm { value, positions }) => {
|
||||
match value {
|
||||
QueryTerm::Phrase(_) => todo!(),
|
||||
QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db } } => {
|
||||
writeln!(file,"{node_idx} : \"{original}\" {{
|
||||
shape: class").unwrap();
|
||||
for w in zero_typo {
|
||||
writeln!(file, "\"{w}\" : 0").unwrap();
|
||||
}
|
||||
for w in one_typo {
|
||||
writeln!(file, "\"{w}\" : 1").unwrap();
|
||||
}
|
||||
for w in two_typos {
|
||||
writeln!(file, "\"{w}\" : 2").unwrap();
|
||||
}
|
||||
if *use_prefix_db {
|
||||
writeln!(file, "use prefix DB : true").unwrap();
|
||||
}
|
||||
writeln!(file, "}}").unwrap();
|
||||
},
|
||||
}
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => {
|
||||
writeln!(file,"{node_idx} : START").unwrap();
|
||||
},
|
||||
QueryNode::End => {
|
||||
writeln!(file,"{node_idx} : END").unwrap();
|
||||
},
|
||||
}
|
||||
}
|
||||
fn query_graph_d2_description(query_graph: &QueryGraph, file: &mut File) {
|
||||
writeln!(file,"direction: right");
|
||||
for node in 0..query_graph.nodes.len() {
|
||||
if matches!(query_graph.nodes[node], QueryNode::Deleted) {
|
||||
continue;
|
||||
}
|
||||
writeln!(file,"{node}");
|
||||
|
||||
Self::query_node_d2_desc(node, &query_graph.nodes[node], file);
|
||||
|
||||
for edge in query_graph.edges[node].successors.iter() {
|
||||
writeln!(file, "{node} -> {edge};\n").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
fn proximity_graph_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, file: &mut File) {
|
||||
writeln!(file,"direction: right").unwrap();
|
||||
|
||||
writeln!(file, "Proximity Graph {{").unwrap();
|
||||
for (node_idx, node) in graph.query_graph.nodes.iter().enumerate() {
|
||||
if matches!(node, QueryNode::Deleted) {
|
||||
continue;
|
||||
}
|
||||
Self::query_node_d2_desc(node_idx, node, file);
|
||||
}
|
||||
for edge in graph.all_edges.iter().flatten() {
|
||||
let Edge { from_node, to_node, cost, details } = edge;
|
||||
|
||||
match &details {
|
||||
EdgeDetails::Unconditional => {
|
||||
writeln!(file,
|
||||
"{from_node} -> {to_node} : \"always cost {cost}\"",
|
||||
cost = edge.cost,
|
||||
);
|
||||
}
|
||||
EdgeDetails::Data(details) => {
|
||||
writeln!(file,
|
||||
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
|
||||
cost = edge.cost,
|
||||
edge_label = ProximityGraph::graphviz_edge_details_label(details)
|
||||
);
|
||||
}
|
||||
}
|
||||
}
|
||||
writeln!(file, "}}").unwrap();
|
||||
|
||||
writeln!(file, "Shortest Paths {{").unwrap();
|
||||
Self::paths_d2_description(graph, paths, file);
|
||||
writeln!(file, "}}").unwrap();
|
||||
}
|
||||
fn paths_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, file: &mut File) {
|
||||
for (edge_idx, rest) in paths.nodes.iter() {
|
||||
let Edge { from_node, to_node, cost, .. } = graph.all_edges[*edge_idx as usize].as_ref().unwrap();
|
||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
||||
let from_node_desc = match from_node {
|
||||
QueryNode::Term(term) => match &term.value {
|
||||
QueryTerm::Phrase(_) => todo!(),
|
||||
QueryTerm::Word { derivations } => derivations.original.clone(),
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => "START".to_owned(),
|
||||
QueryNode::End => "END".to_owned(),
|
||||
};
|
||||
let to_node = &graph.query_graph.nodes[*to_node as usize];
|
||||
let to_node_desc = match to_node {
|
||||
QueryNode::Term(term) => match &term.value {
|
||||
QueryTerm::Phrase(_) => todo!(),
|
||||
QueryTerm::Word { derivations } => derivations.original.clone(),
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => "START".to_owned(),
|
||||
QueryNode::End => "END".to_owned(),
|
||||
};
|
||||
writeln!(file, "{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
|
||||
shape: class
|
||||
}}").unwrap();
|
||||
|
||||
for (dest_edge_idx, _) in rest.nodes.iter() {
|
||||
writeln!(file, "{edge_idx} -> {dest_edge_idx}").unwrap();
|
||||
}
|
||||
Self::paths_d2_description(graph, rest, file);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,8 +1,13 @@
|
||||
#[cfg(test)]
|
||||
pub mod detailed;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{query_graph, QueryGraph, RankingRule, RankingRuleQueryTrait};
|
||||
use super::{
|
||||
query_graph,
|
||||
ranking_rule_graph::{paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph},
|
||||
QueryGraph, RankingRule, RankingRuleQueryTrait,
|
||||
};
|
||||
|
||||
pub struct DefaultSearchLogger;
|
||||
impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
@ -39,6 +44,13 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
fn add_to_results(&mut self, docids: &RoaringBitmap) {}
|
||||
|
||||
fn log_words_state(&mut self, query_graph: &Q) {}
|
||||
|
||||
fn log_proximity_state(
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||
paths_map: &PathsMap<u64>,
|
||||
) {
|
||||
}
|
||||
}
|
||||
|
||||
pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
@ -69,4 +81,10 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
fn add_to_results(&mut self, docids: &RoaringBitmap);
|
||||
|
||||
fn log_words_state(&mut self, query_graph: &Q);
|
||||
|
||||
fn log_proximity_state(
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||
paths: &PathsMap<u64>,
|
||||
);
|
||||
}
|
||||
|
@ -51,7 +51,7 @@ pub fn word_derivations(
|
||||
let mut two_typos = vec![];
|
||||
|
||||
if max_typo == 0 {
|
||||
if is_prefix {
|
||||
if is_prefix && !use_prefix_db {
|
||||
let prefix = Str::new(word).starts_with();
|
||||
let mut stream = fst.search(prefix).into_stream();
|
||||
|
||||
|
@ -11,7 +11,10 @@ use std::ops::ControlFlow;
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use self::paths_map::PathsMap;
|
||||
|
||||
use super::db_cache::DatabaseCache;
|
||||
use super::logger::SearchLogger;
|
||||
use super::{QueryGraph, QueryNode};
|
||||
use crate::{Index, Result};
|
||||
|
||||
@ -23,10 +26,10 @@ pub enum EdgeDetails<E> {
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Edge<E> {
|
||||
from_node: u32,
|
||||
to_node: u32,
|
||||
cost: u8,
|
||||
details: EdgeDetails<E>,
|
||||
pub from_node: u32,
|
||||
pub to_node: u32,
|
||||
pub cost: u8,
|
||||
pub details: EdgeDetails<E>,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
@ -35,11 +38,11 @@ pub struct EdgePointer<'graph, E> {
|
||||
pub edge: &'graph Edge<E>,
|
||||
}
|
||||
|
||||
pub trait RankingRuleGraphTrait {
|
||||
pub trait RankingRuleGraphTrait: Sized {
|
||||
/// The details of an edge connecting two query nodes. These details
|
||||
/// should be sufficient to compute the edge's cost and associated document ids
|
||||
/// in [`compute_docids`](RankingRuleGraphTrait).
|
||||
type EdgeDetails: Sized;
|
||||
type EdgeDetails: Sized + Clone;
|
||||
|
||||
type BuildVisitedFromNode;
|
||||
|
||||
@ -75,6 +78,12 @@ pub trait RankingRuleGraphTrait {
|
||||
to_node: &QueryNode,
|
||||
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>;
|
||||
|
||||
fn log_state(
|
||||
graph: &RankingRuleGraph<Self>,
|
||||
paths: &PathsMap<u64>,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
);
|
||||
}
|
||||
|
||||
pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
||||
@ -90,6 +99,16 @@ pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
||||
// 2. get node_incoming_edges[to]
|
||||
// 3. take intersection betweem the two
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
||||
fn clone(&self) -> Self {
|
||||
Self {
|
||||
query_graph: self.query_graph.clone(),
|
||||
all_edges: self.all_edges.clone(),
|
||||
node_edges: self.node_edges.clone(),
|
||||
successors: self.successors.clone(),
|
||||
}
|
||||
}
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
// Visit all edges between the two given nodes in order of increasing cost.
|
||||
pub fn visit_edges<'graph, O>(
|
||||
|
@ -9,10 +9,10 @@ use super::cheapest_paths::Path;
|
||||
use super::{Edge, EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::QueryNode;
|
||||
|
||||
#[derive(Debug)]
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PathsMap<V> {
|
||||
nodes: Vec<(u32, PathsMap<V>)>,
|
||||
value: Option<V>,
|
||||
pub nodes: Vec<(u32, PathsMap<V>)>,
|
||||
pub value: Option<V>,
|
||||
}
|
||||
impl<V> Default for PathsMap<V> {
|
||||
fn default() -> Self {
|
||||
|
@ -3,6 +3,7 @@ pub mod compute_docids;
|
||||
|
||||
use heed::RoTxn;
|
||||
|
||||
use super::paths_map::PathsMap;
|
||||
use super::{Edge, EdgeDetails, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::query_term::WordDerivations;
|
||||
@ -18,6 +19,7 @@ pub enum WordPair {
|
||||
WordPrefixSwapped { left: String, right_prefix: String },
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct ProximityEdge {
|
||||
pairs: Vec<WordPair>,
|
||||
proximity: u8,
|
||||
@ -61,4 +63,12 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
|
||||
build::visit_to_node(index, txn, db_cache, to_node, from_node_data)
|
||||
}
|
||||
|
||||
fn log_state(
|
||||
graph: &super::RankingRuleGraph<Self>,
|
||||
paths: &PathsMap<u64>,
|
||||
logger: &mut dyn crate::new::logger::SearchLogger<crate::new::QueryGraph>,
|
||||
) {
|
||||
logger.log_proximity_state(graph, paths);
|
||||
}
|
||||
}
|
||||
|
@ -270,8 +270,7 @@ mod tests {
|
||||
let mut db_cache = DatabaseCache::default();
|
||||
|
||||
let query_graph =
|
||||
make_query_graph(&index, &txn, &mut db_cache, "the quick brown fox jumps over")
|
||||
.unwrap();
|
||||
make_query_graph(&index, &txn, &mut db_cache, "b b b b b b b b b b").unwrap();
|
||||
println!("{}", query_graph.graphviz());
|
||||
logger.initial_query(&query_graph);
|
||||
|
||||
@ -314,8 +313,7 @@ mod tests {
|
||||
let mut db_cache = DatabaseCache::default();
|
||||
|
||||
let query_graph =
|
||||
make_query_graph(&index, &txn, &mut db_cache, "released from prison by the government")
|
||||
.unwrap();
|
||||
make_query_graph(&index, &txn, &mut db_cache, "b b b b b b b b b b").unwrap();
|
||||
|
||||
// TODO: filters + maybe distinct attributes?
|
||||
let universe = get_start_universe(
|
||||
@ -335,7 +333,7 @@ mod tests {
|
||||
&mut db_cache,
|
||||
&universe,
|
||||
&query_graph,
|
||||
&mut logger, /* 0, 20 */
|
||||
&mut logger, //&mut DefaultSearchLogger, /* 0, 20 */
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
|
@ -47,8 +47,6 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
||||
self.exhausted = false;
|
||||
self.query_graph = Some(parent_query_graph.clone());
|
||||
|
||||
logger.log_words_state(parent_query_graph);
|
||||
|
||||
// TODO: a phrase can contain many positions, but represents a single node.
|
||||
// That's a problem.
|
||||
let positions_to_remove = match self.terms_matching_strategy {
|
||||
@ -83,11 +81,14 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
||||
// println!("Words: next bucket");
|
||||
assert!(self.iterating);
|
||||
assert!(universe.len() > 1);
|
||||
|
||||
if self.exhausted {
|
||||
return Ok(None);
|
||||
}
|
||||
let Some(query_graph) = &mut self.query_graph else { panic!() };
|
||||
|
||||
logger.log_words_state(query_graph);
|
||||
|
||||
let this_bucket = resolve_query_graph(
|
||||
index,
|
||||
txn,
|
||||
@ -107,7 +108,6 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
||||
let position_to_remove = self.positions_to_remove.pop().unwrap();
|
||||
query_graph.remove_words_at_position(position_to_remove);
|
||||
}
|
||||
logger.log_words_state(query_graph);
|
||||
|
||||
Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user