diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index 6c061cd4c..a9bb31682 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -94,6 +94,8 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap return Ok(None); } + G::log_state(&state.graph, &paths, logger); + let bucket = state.graph.resolve_paths( index, txn, diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs index a108a3a7f..36072af4d 100644 --- a/milli/src/search/new/logger/detailed.rs +++ b/milli/src/search/new/logger/detailed.rs @@ -5,6 +5,8 @@ use std::path::Path; use std::{io::Write, path::PathBuf}; use crate::new::QueryNode; +use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; +use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait}; use crate::new::ranking_rule_graph::{ paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph, }; @@ -112,6 +114,11 @@ impl SearchLogger for DetailedSearchLogger { fn log_words_state(&mut self, query_graph: &QueryGraph) { self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() }); } + + fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph, paths_map: &PathsMap,) { + self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone() }) + } + } @@ -129,7 +136,7 @@ impl DetailedSearchLogger { } let index_path = self.folder_path.join("index.d2"); - let mut file = std::fs::File::create(&index_path).unwrap(); + let mut file = std::fs::File::create(index_path).unwrap(); writeln!(&mut file, "Control Flow Between Ranking Rules: {{").unwrap(); writeln!(&mut file, "shape: sequence_diagram"); for (idx, rr_id) in self.ranking_rules_ids.as_ref().unwrap().iter().enumerate() { @@ -210,29 +217,143 @@ results.{random} {{ let id = format!("{cur_ranking_rule}.{cur_activated_id}"); let mut new_file_path = self.folder_path.join(format!("{id}.d2")); let mut new_file = std::fs::File::create(new_file_path).unwrap(); - Self::query_graph_d2_description(&query_graph, &mut new_file); + Self::query_graph_d2_description(query_graph, &mut new_file); + writeln!( + &mut file, + "{id} {{ + link: \"{id}.d2.svg\" +}}").unwrap(); + }, + SearchEvents::ProximityState { graph, paths } => { + let cur_ranking_rule = timestamp.len() - 1; + let cur_activated_id = activated_id(×tamp); + let id = format!("{cur_ranking_rule}.{cur_activated_id}"); + let mut new_file_path = self.folder_path.join(format!("{id}.d2")); + let mut new_file = std::fs::File::create(new_file_path).unwrap(); + Self::proximity_graph_d2_description(graph, paths, &mut new_file); writeln!( &mut file, "{id} {{ link: \"{id}.d2.svg\" }}").unwrap(); }, - SearchEvents::ProximityState { graph, paths } => todo!(), } } writeln!(&mut file, "}}"); } + + fn query_node_d2_desc(node_idx: usize, node: &QueryNode, file: &mut File) { + match &node { + QueryNode::Term(LocatedQueryTerm { value, positions }) => { + match value { + QueryTerm::Phrase(_) => todo!(), + QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db } } => { + writeln!(file,"{node_idx} : \"{original}\" {{ +shape: class").unwrap(); + for w in zero_typo { + writeln!(file, "\"{w}\" : 0").unwrap(); + } + for w in one_typo { + writeln!(file, "\"{w}\" : 1").unwrap(); + } + for w in two_typos { + writeln!(file, "\"{w}\" : 2").unwrap(); + } + if *use_prefix_db { + writeln!(file, "use prefix DB : true").unwrap(); + } + writeln!(file, "}}").unwrap(); + }, + } + }, + QueryNode::Deleted => panic!(), + QueryNode::Start => { + writeln!(file,"{node_idx} : START").unwrap(); + }, + QueryNode::End => { + writeln!(file,"{node_idx} : END").unwrap(); + }, + } + } fn query_graph_d2_description(query_graph: &QueryGraph, file: &mut File) { writeln!(file,"direction: right"); for node in 0..query_graph.nodes.len() { if matches!(query_graph.nodes[node], QueryNode::Deleted) { continue; } - writeln!(file,"{node}"); - + Self::query_node_d2_desc(node, &query_graph.nodes[node], file); + for edge in query_graph.edges[node].successors.iter() { writeln!(file, "{node} -> {edge};\n").unwrap(); } } } + fn proximity_graph_d2_description(graph: &RankingRuleGraph, paths: &PathsMap, file: &mut File) { + writeln!(file,"direction: right").unwrap(); + + writeln!(file, "Proximity Graph {{").unwrap(); + for (node_idx, node) in graph.query_graph.nodes.iter().enumerate() { + if matches!(node, QueryNode::Deleted) { + continue; + } + Self::query_node_d2_desc(node_idx, node, file); + } + for edge in graph.all_edges.iter().flatten() { + let Edge { from_node, to_node, cost, details } = edge; + + match &details { + EdgeDetails::Unconditional => { + writeln!(file, + "{from_node} -> {to_node} : \"always cost {cost}\"", + cost = edge.cost, + ); + } + EdgeDetails::Data(details) => { + writeln!(file, + "{from_node} -> {to_node} : \"cost {cost} {edge_label}\"", + cost = edge.cost, + edge_label = ProximityGraph::graphviz_edge_details_label(details) + ); + } + } + } + writeln!(file, "}}").unwrap(); + + writeln!(file, "Shortest Paths {{").unwrap(); + Self::paths_d2_description(graph, paths, file); + writeln!(file, "}}").unwrap(); + } + fn paths_d2_description(graph: &RankingRuleGraph, paths: &PathsMap, file: &mut File) { + for (edge_idx, rest) in paths.nodes.iter() { + let Edge { from_node, to_node, cost, .. } = graph.all_edges[*edge_idx as usize].as_ref().unwrap(); + let from_node = &graph.query_graph.nodes[*from_node as usize]; + let from_node_desc = match from_node { + QueryNode::Term(term) => match &term.value { + QueryTerm::Phrase(_) => todo!(), + QueryTerm::Word { derivations } => derivations.original.clone(), + }, + QueryNode::Deleted => panic!(), + QueryNode::Start => "START".to_owned(), + QueryNode::End => "END".to_owned(), + }; + let to_node = &graph.query_graph.nodes[*to_node as usize]; + let to_node_desc = match to_node { + QueryNode::Term(term) => match &term.value { + QueryTerm::Phrase(_) => todo!(), + QueryTerm::Word { derivations } => derivations.original.clone(), + }, + QueryNode::Deleted => panic!(), + QueryNode::Start => "START".to_owned(), + QueryNode::End => "END".to_owned(), + }; + writeln!(file, "{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{ + shape: class + }}").unwrap(); + + for (dest_edge_idx, _) in rest.nodes.iter() { + writeln!(file, "{edge_idx} -> {dest_edge_idx}").unwrap(); + } + Self::paths_d2_description(graph, rest, file); + } + } } diff --git a/milli/src/search/new/logger/mod.rs b/milli/src/search/new/logger/mod.rs index 94c2de713..ccafc7f11 100644 --- a/milli/src/search/new/logger/mod.rs +++ b/milli/src/search/new/logger/mod.rs @@ -1,8 +1,13 @@ +#[cfg(test)] pub mod detailed; use roaring::RoaringBitmap; -use super::{query_graph, QueryGraph, RankingRule, RankingRuleQueryTrait}; +use super::{ + query_graph, + ranking_rule_graph::{paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph}, + QueryGraph, RankingRule, RankingRuleQueryTrait, +}; pub struct DefaultSearchLogger; impl SearchLogger for DefaultSearchLogger { @@ -39,6 +44,13 @@ impl SearchLogger for DefaultSearchLogger { fn add_to_results(&mut self, docids: &RoaringBitmap) {} fn log_words_state(&mut self, query_graph: &Q) {} + + fn log_proximity_state( + &mut self, + query_graph: &RankingRuleGraph, + paths_map: &PathsMap, + ) { + } } pub trait SearchLogger { @@ -69,4 +81,10 @@ pub trait SearchLogger { fn add_to_results(&mut self, docids: &RoaringBitmap); fn log_words_state(&mut self, query_graph: &Q); + + fn log_proximity_state( + &mut self, + query_graph: &RankingRuleGraph, + paths: &PathsMap, + ); } diff --git a/milli/src/search/new/query_term.rs b/milli/src/search/new/query_term.rs index 52943755a..537857bf2 100644 --- a/milli/src/search/new/query_term.rs +++ b/milli/src/search/new/query_term.rs @@ -51,7 +51,7 @@ pub fn word_derivations( let mut two_typos = vec![]; if max_typo == 0 { - if is_prefix { + if is_prefix && !use_prefix_db { let prefix = Str::new(word).starts_with(); let mut stream = fst.search(prefix).into_stream(); diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index 52b685d08..e677be1d9 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -11,7 +11,10 @@ use std::ops::ControlFlow; use heed::RoTxn; use roaring::RoaringBitmap; +use self::paths_map::PathsMap; + use super::db_cache::DatabaseCache; +use super::logger::SearchLogger; use super::{QueryGraph, QueryNode}; use crate::{Index, Result}; @@ -23,10 +26,10 @@ pub enum EdgeDetails { #[derive(Debug, Clone)] pub struct Edge { - from_node: u32, - to_node: u32, - cost: u8, - details: EdgeDetails, + pub from_node: u32, + pub to_node: u32, + pub cost: u8, + pub details: EdgeDetails, } #[derive(Debug, Clone)] @@ -35,11 +38,11 @@ pub struct EdgePointer<'graph, E> { pub edge: &'graph Edge, } -pub trait RankingRuleGraphTrait { +pub trait RankingRuleGraphTrait: Sized { /// The details of an edge connecting two query nodes. These details /// should be sufficient to compute the edge's cost and associated document ids /// in [`compute_docids`](RankingRuleGraphTrait). - type EdgeDetails: Sized; + type EdgeDetails: Sized + Clone; type BuildVisitedFromNode; @@ -75,6 +78,12 @@ pub trait RankingRuleGraphTrait { to_node: &QueryNode, from_node_data: &'from_data Self::BuildVisitedFromNode, ) -> Result)>>; + + fn log_state( + graph: &RankingRuleGraph, + paths: &PathsMap, + logger: &mut dyn SearchLogger, + ); } pub struct RankingRuleGraph { @@ -90,6 +99,16 @@ pub struct RankingRuleGraph { // 2. get node_incoming_edges[to] // 3. take intersection betweem the two } +impl Clone for RankingRuleGraph { + fn clone(&self) -> Self { + Self { + query_graph: self.query_graph.clone(), + all_edges: self.all_edges.clone(), + node_edges: self.node_edges.clone(), + successors: self.successors.clone(), + } + } +} impl RankingRuleGraph { // Visit all edges between the two given nodes in order of increasing cost. pub fn visit_edges<'graph, O>( diff --git a/milli/src/search/new/ranking_rule_graph/paths_map.rs b/milli/src/search/new/ranking_rule_graph/paths_map.rs index 6f6512ae4..8360b1975 100644 --- a/milli/src/search/new/ranking_rule_graph/paths_map.rs +++ b/milli/src/search/new/ranking_rule_graph/paths_map.rs @@ -9,10 +9,10 @@ use super::cheapest_paths::Path; use super::{Edge, EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; use crate::new::QueryNode; -#[derive(Debug)] +#[derive(Debug, Clone)] pub struct PathsMap { - nodes: Vec<(u32, PathsMap)>, - value: Option, + pub nodes: Vec<(u32, PathsMap)>, + pub value: Option, } impl Default for PathsMap { fn default() -> Self { diff --git a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs index 3b9470be2..66e6bad98 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs @@ -3,6 +3,7 @@ pub mod compute_docids; use heed::RoTxn; +use super::paths_map::PathsMap; use super::{Edge, EdgeDetails, RankingRuleGraphTrait}; use crate::new::db_cache::DatabaseCache; use crate::new::query_term::WordDerivations; @@ -18,6 +19,7 @@ pub enum WordPair { WordPrefixSwapped { left: String, right_prefix: String }, } +#[derive(Clone)] pub struct ProximityEdge { pairs: Vec, proximity: u8, @@ -61,4 +63,12 @@ impl RankingRuleGraphTrait for ProximityGraph { ) -> Result)>> { build::visit_to_node(index, txn, db_cache, to_node, from_node_data) } + + fn log_state( + graph: &super::RankingRuleGraph, + paths: &PathsMap, + logger: &mut dyn crate::new::logger::SearchLogger, + ) { + logger.log_proximity_state(graph, paths); + } } diff --git a/milli/src/search/new/ranking_rules.rs b/milli/src/search/new/ranking_rules.rs index 66f5b9d69..6126676e4 100644 --- a/milli/src/search/new/ranking_rules.rs +++ b/milli/src/search/new/ranking_rules.rs @@ -270,8 +270,7 @@ mod tests { let mut db_cache = DatabaseCache::default(); let query_graph = - make_query_graph(&index, &txn, &mut db_cache, "the quick brown fox jumps over") - .unwrap(); + make_query_graph(&index, &txn, &mut db_cache, "b b b b b b b b b b").unwrap(); println!("{}", query_graph.graphviz()); logger.initial_query(&query_graph); @@ -314,8 +313,7 @@ mod tests { let mut db_cache = DatabaseCache::default(); let query_graph = - make_query_graph(&index, &txn, &mut db_cache, "released from prison by the government") - .unwrap(); + make_query_graph(&index, &txn, &mut db_cache, "b b b b b b b b b b").unwrap(); // TODO: filters + maybe distinct attributes? let universe = get_start_universe( @@ -335,7 +333,7 @@ mod tests { &mut db_cache, &universe, &query_graph, - &mut logger, /* 0, 20 */ + &mut logger, //&mut DefaultSearchLogger, /* 0, 20 */ ) .unwrap(); diff --git a/milli/src/search/new/words.rs b/milli/src/search/new/words.rs index 4beb5994a..63df03f93 100644 --- a/milli/src/search/new/words.rs +++ b/milli/src/search/new/words.rs @@ -47,8 +47,6 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { self.exhausted = false; self.query_graph = Some(parent_query_graph.clone()); - logger.log_words_state(parent_query_graph); - // TODO: a phrase can contain many positions, but represents a single node. // That's a problem. let positions_to_remove = match self.terms_matching_strategy { @@ -83,11 +81,14 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { // println!("Words: next bucket"); assert!(self.iterating); assert!(universe.len() > 1); + if self.exhausted { return Ok(None); } let Some(query_graph) = &mut self.query_graph else { panic!() }; + logger.log_words_state(query_graph); + let this_bucket = resolve_query_graph( index, txn, @@ -107,7 +108,6 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { let position_to_remove = self.positions_to_remove.pop().unwrap(); query_graph.remove_words_at_position(position_to_remove); } - logger.log_words_state(query_graph); Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket })) }