From 3b0737a0926ae1c6a8316e943527303cd38743e2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 30 Mar 2023 12:20:44 +0200 Subject: [PATCH] Fix detailed logger --- milli/src/search/new/logger/detailed.rs | 159 ++++++++++++++---------- 1 file changed, 94 insertions(+), 65 deletions(-) diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs index 57be61612..3a02950a8 100644 --- a/milli/src/search/new/logger/detailed.rs +++ b/milli/src/search/new/logger/detailed.rs @@ -8,7 +8,9 @@ use roaring::RoaringBitmap; use crate::search::new::interner::{Interned, MappedInterner}; use crate::search::new::query_graph::QueryNodeData; -use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm}; +use crate::search::new::query_term::{ + Lazy, LocatedQueryTermSubset, OneTypoTerm, QueryTerm, TwoTypoTerm, ZeroTypoTerm, +}; use crate::search::new::ranking_rule_graph::{ DeadEndsCache, Edge, ProximityCondition, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, TypoCondition, TypoGraph, @@ -45,16 +47,16 @@ pub enum SearchEvents { paths: Vec>>, dead_ends_cache: DeadEndsCache, universe: RoaringBitmap, - distances: MappedInterner>, - cost: u16, + costs: MappedInterner>, + cost: u64, }, TypoState { graph: RankingRuleGraph, paths: Vec>>, dead_ends_cache: DeadEndsCache, universe: RoaringBitmap, - distances: MappedInterner>, - cost: u16, + costs: MappedInterner>, + cost: u64, }, RankingRuleSkipBucket { ranking_rule_idx: usize, @@ -171,15 +173,15 @@ impl SearchLogger for DetailedSearchLogger { paths_map: &[Vec>], dead_ends_cache: &DeadEndsCache, universe: &RoaringBitmap, - distances: &MappedInterner>, - cost: u16, + costs: &MappedInterner>, + cost: u64, ) { self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.to_vec(), dead_ends_cache: dead_ends_cache.clone(), universe: universe.clone(), - distances: distances.clone(), + costs: costs.clone(), cost, }) } @@ -190,15 +192,15 @@ impl SearchLogger for DetailedSearchLogger { paths_map: &[Vec>], dead_ends_cache: &DeadEndsCache, universe: &RoaringBitmap, - distances: &MappedInterner>, - cost: u16, + costs: &MappedInterner>, + cost: u64, ) { self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.to_vec(), dead_ends_cache: dead_ends_cache.clone(), universe: universe.clone(), - distances: distances.clone(), + costs: costs.clone(), cost, }) } @@ -358,7 +360,7 @@ results.{cur_ranking_rule}{cur_activated_id} {{ paths, dead_ends_cache, universe, - distances, + costs, cost, } => { let cur_ranking_rule = timestamp.len() - 1; @@ -373,15 +375,15 @@ results.{cur_ranking_rule}{cur_activated_id} {{ graph, paths, dead_ends_cache, - distances.clone(), + costs.clone(), &mut new_file, ); writeln!( &mut file, "{id} {{ - link: \"{id}.d2.svg\" - tooltip: \"cost {cost}, universe len: {}\" -}}", + link: \"{id}.d2.svg\" + tooltip: \"cost {cost}, universe len: {}\" + }}", universe.len() ) .unwrap(); @@ -391,7 +393,7 @@ results.{cur_ranking_rule}{cur_activated_id} {{ paths, dead_ends_cache, universe, - distances, + costs, cost, } => { let cur_ranking_rule = timestamp.len() - 1; @@ -406,7 +408,7 @@ results.{cur_ranking_rule}{cur_activated_id} {{ graph, paths, dead_ends_cache, - distances.clone(), + costs.clone(), &mut new_file, ); writeln!( @@ -424,74 +426,101 @@ results.{cur_ranking_rule}{cur_activated_id} {{ writeln!(&mut file, "}}").unwrap(); } - fn query_node_d2_desc( + fn query_node_d2_desc( ctx: &mut SearchContext, node_idx: Interned, node: &QueryNode, - _distances: &[u16], + _costs: &[u64], file: &mut File, ) { match &node.data { - QueryNodeData::Term(LocatedQueryTerm { value, .. }) => { + QueryNodeData::Term(LocatedQueryTermSubset { + term_subset, + positions: _, + term_ids: _, + }) => { let QueryTerm { original, + is_multiple_words: _, + is_prefix: _, + max_nbr_typos, zero_typo, one_typo, - two_typos, - use_prefix_db, - synonyms, - split_words, - prefix_of, - is_prefix: _, - is_ngram: _, - phrase, - } = ctx.term_interner.get(*value); + two_typo, + } = ctx.term_interner.get(term_subset.original); let original = ctx.word_interner.get(*original); writeln!( file, "{node_idx} : \"{original}\" {{ -shape: class" + shape: class + max_nbr_typo: {max_nbr_typos}" ) .unwrap(); + + let ZeroTypoTerm { phrase, zero_typo, prefix_of, synonyms, use_prefix_db } = + zero_typo; + for w in zero_typo.iter().copied() { - let w = ctx.word_interner.get(w); - writeln!(file, "\"{w}\" : 0").unwrap(); + if term_subset.zero_typo_subset.contains_word(w) { + let w = ctx.word_interner.get(w); + writeln!(file, "\"{w}\" : 0").unwrap(); + } } for w in prefix_of.iter().copied() { - let w = ctx.word_interner.get(w); - writeln!(file, "\"{w}\" : 0P").unwrap(); - } - for w in one_typo.iter().copied() { - let w = ctx.word_interner.get(w); - writeln!(file, "\"{w}\" : 1").unwrap(); - } - for w in two_typos.iter().copied() { - let w = ctx.word_interner.get(w); - writeln!(file, "\"{w}\" : 2").unwrap(); + if term_subset.zero_typo_subset.contains_word(w) { + let w = ctx.word_interner.get(w); + writeln!(file, "\"{w}\" : 0P").unwrap(); + } } + if let Some(phrase) = phrase { - let phrase = ctx.phrase_interner.get(*phrase); - let phrase_str = phrase.description(&ctx.word_interner); - writeln!(file, "\"{phrase_str}\" : phrase").unwrap(); - } - if let Some(split_words) = split_words { - let phrase = ctx.phrase_interner.get(*split_words); - let phrase_str = phrase.description(&ctx.word_interner); - writeln!(file, "\"{phrase_str}\" : split_words").unwrap(); + if term_subset.zero_typo_subset.contains_phrase(*phrase) { + let phrase = ctx.phrase_interner.get(*phrase); + let phrase_str = phrase.description(&ctx.word_interner); + writeln!(file, "\"{phrase_str}\" : phrase").unwrap(); + } } + for synonym in synonyms.iter().copied() { - let phrase = ctx.phrase_interner.get(synonym); - let phrase_str = phrase.description(&ctx.word_interner); - writeln!(file, "\"{phrase_str}\" : synonym").unwrap(); + if term_subset.zero_typo_subset.contains_phrase(synonym) { + let phrase = ctx.phrase_interner.get(synonym); + let phrase_str = phrase.description(&ctx.word_interner); + writeln!(file, "\"{phrase_str}\" : synonym").unwrap(); + } } if let Some(use_prefix_db) = use_prefix_db { - let p = ctx.word_interner.get(*use_prefix_db); - writeln!(file, "use prefix DB : {p}").unwrap(); + if term_subset.zero_typo_subset.contains_word(*use_prefix_db) { + let p = ctx.word_interner.get(*use_prefix_db); + writeln!(file, "use prefix DB : {p}").unwrap(); + } + } + if let Lazy::Init(one_typo) = one_typo { + let OneTypoTerm { split_words, one_typo } = one_typo; + + for w in one_typo.iter().copied() { + if term_subset.one_typo_subset.contains_word(w) { + let w = ctx.word_interner.get(w); + writeln!(file, "\"{w}\" : 1").unwrap(); + } + } + if let Some(split_words) = split_words { + if term_subset.one_typo_subset.contains_phrase(*split_words) { + let phrase = ctx.phrase_interner.get(*split_words); + let phrase_str = phrase.description(&ctx.word_interner); + writeln!(file, "\"{phrase_str}\" : split_words").unwrap(); + } + } + } + if let Lazy::Init(two_typo) = two_typo { + let TwoTypoTerm { two_typos } = two_typo; + for w in two_typos.iter().copied() { + if term_subset.two_typo_subset.contains_word(w) { + let w = ctx.word_interner.get(w); + writeln!(file, "\"{w}\" : 2").unwrap(); + } + } } - // for d in distances.iter() { - // writeln!(file, "\"d_{d}\" : distance").unwrap(); - // } writeln!(file, "}}").unwrap(); } @@ -514,7 +543,7 @@ shape: class" if matches!(node.data, QueryNodeData::Deleted) { continue; } - Self::query_node_d2_desc::(ctx, node_id, node, &[], file); + Self::query_node_d2_desc(ctx, node_id, node, &[], file); for edge in node.successors.iter() { writeln!(file, "{node_id} -> {edge};\n").unwrap(); @@ -526,7 +555,7 @@ shape: class" graph: &RankingRuleGraph, paths: &[Vec>], _dead_ends_cache: &DeadEndsCache, - distances: MappedInterner>, + costs: MappedInterner>, file: &mut File, ) { writeln!(file, "direction: right").unwrap(); @@ -536,12 +565,12 @@ shape: class" if matches!(&node.data, QueryNodeData::Deleted) { continue; } - let distances = &distances.get(node_idx); - Self::query_node_d2_desc::(ctx, node_idx, node, distances, file); + let costs = &costs.get(node_idx); + Self::query_node_d2_desc(ctx, node_idx, node, costs, file); } for (_edge_id, edge) in graph.edges_store.iter() { let Some(edge) = edge else { continue }; - let Edge { source_node, dest_node, condition: details, cost } = edge; + let Edge { source_node, dest_node, condition: details, cost, nodes_to_skip: _ } = edge; match &details { None => { @@ -561,7 +590,7 @@ shape: class" } writeln!(file, "}}").unwrap(); - // writeln!(file, "Distances {{").unwrap(); + // writeln!(file, "costs {{").unwrap(); // Self::paths_d2_description(graph, paths, file); // writeln!(file, "}}").unwrap();