diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 594405891..af4324ae4 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -54,8 +54,6 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc; #[macro_use] pub mod documents; -pub use search::new; - mod asc_desc; mod criterion; mod error; diff --git a/milli/src/search/new/db_cache.rs b/milli/src/search/new/db_cache.rs index 8ebe14047..100dae90a 100644 --- a/milli/src/search/new/db_cache.rs +++ b/milli/src/search/new/db_cache.rs @@ -1,8 +1,11 @@ -use super::{interner::Interned, SearchContext}; -use crate::Result; +use std::collections::hash_map::Entry; + use fxhash::FxHashMap; use heed::types::ByteSlice; -use std::collections::hash_map::Entry; + +use super::interner::Interned; +use super::SearchContext; +use crate::Result; #[derive(Default)] pub struct DatabaseCache<'search> { diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index 6c2e714ad..1d17c32a8 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -1,13 +1,13 @@ -use super::logger::SearchLogger; -use super::ranking_rule_graph::EdgeDocidsCache; -use super::ranking_rule_graph::EmptyPathsCache; -use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait}; -use super::small_bitmap::SmallBitmap; -use super::SearchContext; -use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput}; -use crate::Result; use roaring::RoaringBitmap; +use super::logger::SearchLogger; +use super::ranking_rule_graph::{ + EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait, +}; +use super::small_bitmap::SmallBitmap; +use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput, SearchContext}; +use crate::Result; + pub struct GraphBasedRankingRule { id: String, state: Option>, diff --git a/milli/src/search/new/interner.rs b/milli/src/search/new/interner.rs index ae0a4e9cb..8a8fad1e1 100644 --- a/milli/src/search/new/interner.rs +++ b/milli/src/search/new/interner.rs @@ -1,7 +1,8 @@ -use fxhash::FxHashMap; use std::hash::Hash; use std::marker::PhantomData; +use fxhash::FxHashMap; + pub struct Interned { idx: u32, _phantom: PhantomData, diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs index 47b3e2ea2..c6570ef54 100644 --- a/milli/src/search/new/logger/detailed.rs +++ b/milli/src/search/new/logger/detailed.rs @@ -1,39 +1,37 @@ +use std::fs::File; +use std::io::Write; +use std::path::PathBuf; +use std::time::Instant; use rand::random; use roaring::RoaringBitmap; -use std::fs::File; -use std::time::Instant; -use std::{io::Write, path::PathBuf}; -use crate::new::ranking_rule_graph::TypoGraph; -use crate::new::small_bitmap::SmallBitmap; -use crate::new::{QueryNode, QueryGraph, SearchContext}; -use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; -use crate::new::ranking_rule_graph::EmptyPathsCache; -use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait}; -use crate::new::ranking_rule_graph::{ - ProximityGraph, RankingRuleGraph, +use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; +use crate::search::new::ranking_rule_graph::{ + Edge, EdgeDetails, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait, + TypoGraph, }; - -use super::{RankingRule, SearchLogger}; +use crate::search::new::small_bitmap::SmallBitmap; +use crate::search::new::{QueryGraph, QueryNode, SearchContext}; +use crate::search::new::{RankingRule, SearchLogger}; pub enum SearchEvents { RankingRuleStartIteration { ranking_rule_idx: usize, query: QueryGraph, universe: RoaringBitmap, - time: Instant + time: Instant, }, RankingRuleNextBucket { ranking_rule_idx: usize, universe: RoaringBitmap, candidates: RoaringBitmap, - time: Instant + time: Instant, }, RankingRuleEndIteration { ranking_rule_idx: usize, universe: RoaringBitmap, - time: Instant + time: Instant, }, ExtendResults { new: Vec, @@ -57,7 +55,11 @@ pub enum SearchEvents { distances: Vec>, cost: u16, }, - RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap, time: Instant }, + RankingRuleSkipBucket { + ranking_rule_idx: usize, + candidates: RoaringBitmap, + time: Instant, + }, } pub struct DetailedSearchLogger { @@ -106,7 +108,6 @@ impl SearchLogger for DetailedSearchLogger { _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, query: &QueryGraph, universe: &RoaringBitmap, - ) { self.events.push(SearchEvents::RankingRuleStartIteration { ranking_rule_idx, @@ -122,7 +123,6 @@ impl SearchLogger for DetailedSearchLogger { _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, universe: &RoaringBitmap, candidates: &RoaringBitmap, - ) { self.events.push(SearchEvents::RankingRuleNextBucket { ranking_rule_idx, @@ -136,12 +136,11 @@ impl SearchLogger for DetailedSearchLogger { ranking_rule_idx: usize, _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, candidates: &RoaringBitmap, - ) { self.events.push(SearchEvents::RankingRuleSkipBucket { ranking_rule_idx, candidates: candidates.clone(), - time: Instant::now() + time: Instant::now(), }) } @@ -150,12 +149,11 @@ impl SearchLogger for DetailedSearchLogger { ranking_rule_idx: usize, _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, universe: &RoaringBitmap, - ) { self.events.push(SearchEvents::RankingRuleEndIteration { ranking_rule_idx, universe: universe.clone(), - time: Instant::now() + time: Instant::now(), }) } fn add_to_results(&mut self, docids: &[u32]) { @@ -166,18 +164,47 @@ impl SearchLogger for DetailedSearchLogger { self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() }); } - fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph, paths_map: &[Vec], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec>, cost: u16,) { - self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost }) - } - - fn log_typo_state(&mut self, query_graph: &RankingRuleGraph, paths_map: &[Vec], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec>, cost: u16,) { - self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost }) + fn log_proximity_state( + &mut self, + query_graph: &RankingRuleGraph, + paths_map: &[Vec], + empty_paths_cache: &EmptyPathsCache, + universe: &RoaringBitmap, + distances: Vec>, + cost: u16, + ) { + self.events.push(SearchEvents::ProximityState { + graph: query_graph.clone(), + paths: paths_map.to_vec(), + empty_paths_cache: empty_paths_cache.clone(), + universe: universe.clone(), + distances, + cost, + }) } + fn log_typo_state( + &mut self, + query_graph: &RankingRuleGraph, + paths_map: &[Vec], + empty_paths_cache: &EmptyPathsCache, + universe: &RoaringBitmap, + distances: Vec>, + cost: u16, + ) { + self.events.push(SearchEvents::TypoState { + graph: query_graph.clone(), + paths: paths_map.to_vec(), + empty_paths_cache: empty_paths_cache.clone(), + universe: universe.clone(), + distances, + cost, + }) + } } impl DetailedSearchLogger { - pub fn write_d2_description(&self,ctx: &mut SearchContext,) { + pub fn write_d2_description(&self, ctx: &mut SearchContext) { let mut prev_time = self.initial_query_time.unwrap(); let mut timestamp = vec![]; fn activated_id(timestamp: &[usize]) -> String { @@ -229,21 +256,29 @@ impl DetailedSearchLogger { ) .unwrap(); } - writeln!(&mut file, - "{ranking_rule_idx}.{self_activated_id} {{ + writeln!( + &mut file, + "{ranking_rule_idx}.{self_activated_id} {{ style {{ fill: \"#D8A7B1\" }} -}}").unwrap(); +}}" + ) + .unwrap(); } - SearchEvents::RankingRuleNextBucket { ranking_rule_idx, time, universe, candidates } => { + SearchEvents::RankingRuleNextBucket { + ranking_rule_idx, + time, + universe, + candidates, + } => { let _elapsed = time.duration_since(prev_time); prev_time = *time; let old_activated_id = activated_id(×tamp); // writeln!(&mut file, "time.{old_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap(); *timestamp.last_mut().unwrap() += 1; let next_activated_id = activated_id(×tamp); - writeln!(&mut file, + writeln!(&mut file, "{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : next bucket {}/{}", candidates.len(), universe.len()) .unwrap(); } @@ -255,7 +290,7 @@ impl DetailedSearchLogger { *timestamp.last_mut().unwrap() += 1; let next_activated_id = activated_id(×tamp); let len = candidates.len(); - writeln!(&mut file, + writeln!(&mut file, "{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : skip bucket ({len})",) .unwrap(); } @@ -280,14 +315,14 @@ impl DetailedSearchLogger { } SearchEvents::ExtendResults { new } => { if new.is_empty() { - continue + continue; } let cur_ranking_rule = timestamp.len() - 1; let cur_activated_id = activated_id(×tamp); let docids = new.iter().collect::>(); let len = new.len(); let random = random::(); - + writeln!( &mut file, "{cur_ranking_rule}.{cur_activated_id} -> results.{random} : \"add {len}\" @@ -300,7 +335,7 @@ results.{random} {{ " ) .unwrap(); - }, + } SearchEvents::WordsState { query_graph } => { let cur_ranking_rule = timestamp.len() - 1; *timestamp.last_mut().unwrap() += 1; @@ -314,9 +349,18 @@ results.{random} {{ &mut file, "{id} {{ link: \"{id}.d2.svg\" -}}").unwrap(); - }, - SearchEvents::ProximityState { graph, paths, empty_paths_cache, universe, distances, cost } => { +}}" + ) + .unwrap(); + } + SearchEvents::ProximityState { + graph, + paths, + empty_paths_cache, + universe, + distances, + cost, + } => { let cur_ranking_rule = timestamp.len() - 1; *timestamp.last_mut().unwrap() += 1; let cur_activated_id = activated_id(×tamp); @@ -324,15 +368,32 @@ results.{random} {{ let id = format!("{cur_ranking_rule}.{cur_activated_id}"); let new_file_path = self.folder_path.join(format!("{id}.d2")); let mut new_file = std::fs::File::create(new_file_path).unwrap(); - Self::ranking_rule_graph_d2_description(ctx, graph, paths, empty_paths_cache, distances.clone(), &mut new_file); + Self::ranking_rule_graph_d2_description( + ctx, + graph, + paths, + empty_paths_cache, + distances.clone(), + &mut new_file, + ); writeln!( &mut file, "{id} {{ link: \"{id}.d2.svg\" tooltip: \"cost {cost}, universe len: {}\" -}}", universe.len()).unwrap(); - }, - SearchEvents::TypoState { graph, paths, empty_paths_cache, universe, distances, cost } => { +}}", + universe.len() + ) + .unwrap(); + } + SearchEvents::TypoState { + graph, + paths, + empty_paths_cache, + universe, + distances, + cost, + } => { let cur_ranking_rule = timestamp.len() - 1; *timestamp.last_mut().unwrap() += 1; let cur_activated_id = activated_id(×tamp); @@ -340,89 +401,130 @@ results.{random} {{ let id = format!("{cur_ranking_rule}.{cur_activated_id}"); let new_file_path = self.folder_path.join(format!("{id}.d2")); let mut new_file = std::fs::File::create(new_file_path).unwrap(); - Self::ranking_rule_graph_d2_description(ctx,graph, paths, empty_paths_cache, distances.clone(), &mut new_file); + Self::ranking_rule_graph_d2_description( + ctx, + graph, + paths, + empty_paths_cache, + distances.clone(), + &mut new_file, + ); writeln!( &mut file, "{id} {{ link: \"{id}.d2.svg\" tooltip: \"cost {cost}, universe len: {}\" -}}", universe.len()).unwrap(); - }, +}}", + universe.len() + ) + .unwrap(); + } } } writeln!(&mut file, "}}").unwrap(); } - - fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, distances: &[(u16, SmallBitmap)], file: &mut File) { + + fn query_node_d2_desc( + ctx: &mut SearchContext, + node_idx: usize, + node: &QueryNode, + distances: &[(u16, SmallBitmap)], + file: &mut File, + ) { match &node { - QueryNode::Term(LocatedQueryTerm { value, .. }) => { - match value { - QueryTerm::Phrase { phrase } => { - let phrase = ctx.phrase_interner.get(*phrase); - let phrase_str = phrase.description(&ctx.word_interner); - writeln!(file,"{node_idx} : \"{phrase_str}\"").unwrap(); - }, - QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db, synonyms, split_words } } => { - let original = ctx.word_interner.get(*original); - writeln!(file,"{node_idx} : \"{original}\" {{ -shape: class").unwrap(); - for w in zero_typo.iter().copied() { - let w = ctx.word_interner.get(w); - writeln!(file, "\"{w}\" : 0").unwrap(); - } - for w in one_typo.iter().copied() { - let w = ctx.word_interner.get(w); - writeln!(file, "\"{w}\" : 1").unwrap(); - } - for w in two_typos.iter().copied() { - let w = ctx.word_interner.get(w); - writeln!(file, "\"{w}\" : 2").unwrap(); - } - if let Some(split_words) = split_words { - let phrase = ctx.phrase_interner.get(*split_words); - let phrase_str = phrase.description(&ctx.word_interner); - writeln!(file, "\"{phrase_str}\" : split_words").unwrap(); - } - for synonym in synonyms.iter().copied() { - let phrase = ctx.phrase_interner.get(synonym); - let phrase_str = phrase.description(&ctx.word_interner); - writeln!(file, "\"{phrase_str}\" : synonym").unwrap(); - } - if *use_prefix_db { - writeln!(file, "use prefix DB : true").unwrap(); - } - for (d, edges) in distances.iter() { - writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::>() ).unwrap(); - } - - writeln!(file, "}}").unwrap(); - }, + QueryNode::Term(LocatedQueryTerm { value, .. }) => match value { + QueryTerm::Phrase { phrase } => { + let phrase = ctx.phrase_interner.get(*phrase); + let phrase_str = phrase.description(&ctx.word_interner); + writeln!(file, "{node_idx} : \"{phrase_str}\"").unwrap(); + } + QueryTerm::Word { + derivations: + WordDerivations { + original, + zero_typo, + one_typo, + two_typos, + use_prefix_db, + synonyms, + split_words, + }, + } => { + let original = ctx.word_interner.get(*original); + writeln!( + file, + "{node_idx} : \"{original}\" {{ +shape: class" + ) + .unwrap(); + for w in zero_typo.iter().copied() { + let w = ctx.word_interner.get(w); + writeln!(file, "\"{w}\" : 0").unwrap(); + } + for w in one_typo.iter().copied() { + let w = ctx.word_interner.get(w); + writeln!(file, "\"{w}\" : 1").unwrap(); + } + for w in two_typos.iter().copied() { + let w = ctx.word_interner.get(w); + writeln!(file, "\"{w}\" : 2").unwrap(); + } + if let Some(split_words) = split_words { + let phrase = ctx.phrase_interner.get(*split_words); + let phrase_str = phrase.description(&ctx.word_interner); + writeln!(file, "\"{phrase_str}\" : split_words").unwrap(); + } + for synonym in synonyms.iter().copied() { + let phrase = ctx.phrase_interner.get(synonym); + let phrase_str = phrase.description(&ctx.word_interner); + writeln!(file, "\"{phrase_str}\" : synonym").unwrap(); + } + if *use_prefix_db { + writeln!(file, "use prefix DB : true").unwrap(); + } + for (d, edges) in distances.iter() { + writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::>()) + .unwrap(); + } + + writeln!(file, "}}").unwrap(); } }, QueryNode::Deleted => panic!(), QueryNode::Start => { - writeln!(file,"{node_idx} : START").unwrap(); - }, + writeln!(file, "{node_idx} : START").unwrap(); + } QueryNode::End => { - writeln!(file,"{node_idx} : END").unwrap(); - }, + writeln!(file, "{node_idx} : END").unwrap(); + } } } - fn query_graph_d2_description(ctx: &mut SearchContext, query_graph: &QueryGraph, file: &mut File) { - writeln!(file,"direction: right").unwrap(); + fn query_graph_d2_description( + ctx: &mut SearchContext, + query_graph: &QueryGraph, + file: &mut File, + ) { + writeln!(file, "direction: right").unwrap(); for node in 0..query_graph.nodes.len() { if matches!(query_graph.nodes[node], QueryNode::Deleted) { continue; } Self::query_node_d2_desc(ctx, node, &query_graph.nodes[node], &[], file); - + for edge in query_graph.edges[node].successors.iter() { writeln!(file, "{node} -> {edge};\n").unwrap(); } - } + } } - fn ranking_rule_graph_d2_description(ctx: &mut SearchContext, graph: &RankingRuleGraph, paths: &[Vec], _empty_paths_cache: &EmptyPathsCache, distances: Vec>, file: &mut File) { - writeln!(file,"direction: right").unwrap(); + fn ranking_rule_graph_d2_description( + ctx: &mut SearchContext, + graph: &RankingRuleGraph, + paths: &[Vec], + _empty_paths_cache: &EmptyPathsCache, + distances: Vec>, + file: &mut File, + ) { + writeln!(file, "direction: right").unwrap(); writeln!(file, "Proximity Graph {{").unwrap(); for (node_idx, node) in graph.query_graph.nodes.iter().enumerate() { @@ -437,17 +539,21 @@ shape: class").unwrap(); match &details { EdgeDetails::Unconditional => { - writeln!(file, + writeln!( + file, "{from_node} -> {to_node} : \"always cost {cost}\"", cost = edge.cost, - ).unwrap(); + ) + .unwrap(); } EdgeDetails::Data(details) => { - writeln!(file, + writeln!( + file, "{from_node} -> {to_node} : \"cost {cost} {edge_label}\"", cost = edge.cost, edge_label = R::graphviz_edge_details_label(details) - ).unwrap(); + ) + .unwrap(); } } } @@ -457,12 +563,11 @@ shape: class").unwrap(); // Self::paths_d2_description(graph, paths, file); // writeln!(file, "}}").unwrap(); - writeln!(file, "Shortest Paths {{").unwrap(); Self::paths_d2_description(ctx, graph, paths, file); writeln!(file, "}}").unwrap(); - // writeln!(file, "Empty Edge Couples {{").unwrap(); + // writeln!(file, "Empty Edge Couples {{").unwrap(); // for (i, (e1, e2)) in empty_paths_cache.empty_couple_edges.iter().enumerate() { // writeln!(file, "{i} : \"\" {{").unwrap(); // Self::edge_d2_description(graph, *e1, file); @@ -478,18 +583,24 @@ shape: class").unwrap(); // } // writeln!(file, "}}").unwrap(); } - fn edge_d2_description(ctx: &mut SearchContext, graph: &RankingRuleGraph, edge_idx: u16, file: &mut File) { - let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ; + fn edge_d2_description( + ctx: &mut SearchContext, + graph: &RankingRuleGraph, + edge_idx: u16, + file: &mut File, + ) { + let Edge { from_node, to_node, cost, .. } = + graph.all_edges[edge_idx as usize].as_ref().unwrap(); let from_node = &graph.query_graph.nodes[*from_node as usize]; let from_node_desc = match from_node { QueryNode::Term(term) => match &term.value { QueryTerm::Phrase { phrase } => { let phrase = ctx.phrase_interner.get(*phrase); phrase.description(&ctx.word_interner) - }, + } QueryTerm::Word { derivations } => { ctx.word_interner.get(derivations.original).to_owned() - }, + } }, QueryNode::Deleted => panic!(), QueryNode::Start => "START".to_owned(), @@ -501,18 +612,29 @@ shape: class").unwrap(); QueryTerm::Phrase { phrase } => { let phrase = ctx.phrase_interner.get(*phrase); phrase.description(&ctx.word_interner) - }, - QueryTerm::Word { derivations } => ctx.word_interner.get(derivations.original).to_owned(), + } + QueryTerm::Word { derivations } => { + ctx.word_interner.get(derivations.original).to_owned() + } }, QueryNode::Deleted => panic!(), QueryNode::Start => "START".to_owned(), QueryNode::End => "END".to_owned(), }; - writeln!(file, "{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{ + writeln!( + file, + "{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{ shape: class - }}").unwrap(); + }}" + ) + .unwrap(); } - fn paths_d2_description(ctx: &mut SearchContext, graph: &RankingRuleGraph, paths: &[Vec], file: &mut File) { + fn paths_d2_description( + ctx: &mut SearchContext, + graph: &RankingRuleGraph, + paths: &[Vec], + file: &mut File, + ) { for (path_idx, edge_indexes) in paths.iter().enumerate() { writeln!(file, "{path_idx} {{").unwrap(); for edge_idx in edge_indexes.iter() { diff --git a/milli/src/search/new/logger/mod.rs b/milli/src/search/new/logger/mod.rs index 8a10fd064..11e1389d0 100644 --- a/milli/src/search/new/logger/mod.rs +++ b/milli/src/search/new/logger/mod.rs @@ -3,11 +3,9 @@ pub mod detailed; use roaring::RoaringBitmap; -use super::{ - ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph}, - small_bitmap::SmallBitmap, - RankingRule, RankingRuleQueryTrait, -}; +use super::ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph}; +use super::small_bitmap::SmallBitmap; +use super::{RankingRule, RankingRuleQueryTrait}; pub struct DefaultSearchLogger; impl SearchLogger for DefaultSearchLogger { diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index dc73fe51c..0dbdd93b0 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -11,12 +11,8 @@ mod small_bitmap; mod sort; mod words; -use self::interner::Interner; -use self::logger::SearchLogger; -use self::query_term::Phrase; -use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache}; -use crate::new::query_term::located_query_terms_from_string; -use crate::{Filter, Index, Result, TermsMatchingStrategy}; +use std::collections::BTreeSet; + use charabia::Tokenize; use db_cache::DatabaseCache; use heed::RoTxn; @@ -26,7 +22,13 @@ pub use ranking_rules::{ RankingRuleOutputIterWrapper, RankingRuleQueryTrait, }; use roaring::RoaringBitmap; -use std::collections::BTreeSet; + +use self::interner::Interner; +use self::logger::SearchLogger; +use self::query_term::Phrase; +use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache}; +use crate::search::new::query_term::located_query_terms_from_string; +use crate::{Filter, Index, Result, TermsMatchingStrategy}; pub enum BitmapOrAllRef<'s> { Bitmap(&'s RoaringBitmap), diff --git a/milli/src/search/new/query_term.rs b/milli/src/search/new/query_term.rs index b5e29bffc..46a62b4a9 100644 --- a/milli/src/search/new/query_term.rs +++ b/milli/src/search/new/query_term.rs @@ -12,13 +12,12 @@ use heed::types::DecodeIgnore; use heed::RoTxn; use itertools::Itertools; +use super::interner::{Interned, Interner}; +use super::SearchContext; use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union}; use crate::search::{build_dfa, get_first}; use crate::{CboRoaringBitmapLenCodec, Index, Result}; -use super::interner::{Interned, Interner}; -use super::SearchContext; - #[derive(Default, Clone, PartialEq, Eq, Hash)] pub struct Phrase { pub words: Vec>>, diff --git a/milli/src/search/new/ranking_rule_graph/build.rs b/milli/src/search/new/ranking_rule_graph/build.rs index 261f2909b..d9732b010 100644 --- a/milli/src/search/new/ranking_rule_graph/build.rs +++ b/milli/src/search/new/ranking_rule_graph/build.rs @@ -1,8 +1,8 @@ use std::collections::HashSet; use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait}; -use crate::new::small_bitmap::SmallBitmap; -use crate::new::{QueryGraph, SearchContext}; +use crate::search::new::small_bitmap::SmallBitmap; +use crate::search::new::{QueryGraph, SearchContext}; use crate::Result; impl RankingRuleGraph { diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index 1adade945..8627860e7 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -1,11 +1,12 @@ #![allow(clippy::too_many_arguments)] +use std::collections::btree_map::Entry; +use std::collections::{BTreeMap, VecDeque}; + use super::empty_paths_cache::EmptyPathsCache; use super::{RankingRuleGraph, RankingRuleGraphTrait}; -use crate::new::small_bitmap::SmallBitmap; +use crate::search::new::small_bitmap::SmallBitmap; use crate::Result; -use std::collections::btree_map::Entry; -use std::collections::{BTreeMap, VecDeque}; #[derive(Debug, Clone, PartialEq, Eq, Hash)] pub struct Path { diff --git a/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs b/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs index 9823c4fcc..c0c46289c 100644 --- a/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs @@ -1,11 +1,12 @@ use std::marker::PhantomData; -use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; -use crate::new::{BitmapOrAllRef, SearchContext}; -use crate::Result; use fxhash::FxHashMap; use roaring::RoaringBitmap; +use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; +use crate::search::new::{BitmapOrAllRef, SearchContext}; +use crate::Result; + // TODO: the cache should have a G::EdgeDetails as key // but then it means that we should have a quick way of // computing their hash and comparing them diff --git a/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs index 3c8fb5184..659042a01 100644 --- a/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs @@ -1,6 +1,5 @@ -use crate::new::small_bitmap::SmallBitmap; - use super::paths_map::PathsMap; +use crate::search::new::small_bitmap::SmallBitmap; #[derive(Clone)] pub struct EmptyPathsCache { diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index 6d7445eac..635f194f5 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -6,16 +6,17 @@ mod paths_map; mod proximity; mod typo; -use super::logger::SearchLogger; -use super::small_bitmap::SmallBitmap; -use super::{QueryGraph, QueryNode, SearchContext}; -use crate::Result; pub use edge_docids_cache::EdgeDocidsCache; pub use empty_paths_cache::EmptyPathsCache; pub use proximity::ProximityGraph; use roaring::RoaringBitmap; pub use typo::TypoGraph; +use super::logger::SearchLogger; +use super::small_bitmap::SmallBitmap; +use super::{QueryGraph, QueryNode, SearchContext}; +use crate::Result; + #[derive(Debug, Clone)] pub enum EdgeDetails { Unconditional, diff --git a/milli/src/search/new/ranking_rule_graph/paths_map.rs b/milli/src/search/new/ranking_rule_graph/paths_map.rs index 0cce9c93f..82f181b97 100644 --- a/milli/src/search/new/ranking_rule_graph/paths_map.rs +++ b/milli/src/search/new/ranking_rule_graph/paths_map.rs @@ -1,11 +1,10 @@ -use crate::new::small_bitmap::SmallBitmap; use super::cheapest_paths::Path; +use crate::search::new::small_bitmap::SmallBitmap; // What is PathsMap used for? // For the empty_prefixes field in the EmptyPathsCache only :/ // but it could be used for more, like efficient computing of a set of paths - #[derive(Debug, Clone)] pub struct PathsMap { pub nodes: Vec<(u16, PathsMap)>, @@ -53,10 +52,10 @@ impl PathsMap { } } fn remove_first_rec(&mut self, cur: &mut Vec) -> (bool, V) { - let Some((first_edge, rest)) = self.nodes.first_mut() else { + let Some((first_edge, rest)) = self.nodes.first_mut() else { // The PathsMap has to be correct by construction here, otherwise // the unwrap() will crash - return (true, self.value.take().unwrap()) + return (true, self.value.take().unwrap()) }; cur.push(*first_edge); let (rest_is_empty, value) = rest.remove_first_rec(cur); diff --git a/milli/src/search/new/ranking_rule_graph/proximity/build.rs b/milli/src/search/new/ranking_rule_graph/proximity/build.rs index e0bc1f5e4..48a6dda7e 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/build.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/build.rs @@ -1,12 +1,14 @@ -use super::ProximityEdge; -use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; -use crate::new::ranking_rule_graph::proximity::WordPair; -use crate::new::ranking_rule_graph::EdgeDetails; -use crate::new::{QueryNode, SearchContext}; -use crate::Result; -use itertools::Itertools; use std::collections::BTreeMap; +use itertools::Itertools; + +use super::ProximityEdge; +use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; +use crate::search::new::ranking_rule_graph::proximity::WordPair; +use crate::search::new::ranking_rule_graph::EdgeDetails; +use crate::search::new::{QueryNode, SearchContext}; +use crate::Result; + pub fn visit_from_node( ctx: &mut SearchContext, from_node: &QueryNode, diff --git a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs index 94a46d670..9aa4ce446 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs @@ -1,8 +1,9 @@ -use super::{ProximityEdge, WordPair}; -use crate::new::SearchContext; -use crate::{CboRoaringBitmapCodec, Result}; use roaring::RoaringBitmap; +use super::{ProximityEdge, WordPair}; +use crate::search::new::SearchContext; +use crate::{CboRoaringBitmapCodec, Result}; + pub fn compute_docids<'search>( ctx: &mut SearchContext<'search>, edge: &ProximityEdge, diff --git a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs index 7cc4f995f..09c9aa960 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs @@ -1,15 +1,16 @@ pub mod build; pub mod compute_docids; +use roaring::RoaringBitmap; + use super::empty_paths_cache::EmptyPathsCache; use super::{EdgeDetails, RankingRuleGraphTrait}; -use crate::new::interner::Interned; -use crate::new::logger::SearchLogger; -use crate::new::query_term::WordDerivations; -use crate::new::small_bitmap::SmallBitmap; -use crate::new::{QueryGraph, QueryNode, SearchContext}; +use crate::search::new::interner::Interned; +use crate::search::new::logger::SearchLogger; +use crate::search::new::query_term::WordDerivations; +use crate::search::new::small_bitmap::SmallBitmap; +use crate::search::new::{QueryGraph, QueryNode, SearchContext}; use crate::Result; -use roaring::RoaringBitmap; // TODO: intern the proximity edges as well? diff --git a/milli/src/search/new/ranking_rule_graph/typo/mod.rs b/milli/src/search/new/ranking_rule_graph/typo/mod.rs index d3aec7174..ce569fbb0 100644 --- a/milli/src/search/new/ranking_rule_graph/typo/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/typo/mod.rs @@ -1,15 +1,16 @@ -use super::empty_paths_cache::EmptyPathsCache; -use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; -use crate::new::interner::Interned; -use crate::new::logger::SearchLogger; -use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations}; -use crate::new::resolve_query_graph::resolve_phrase; -use crate::new::small_bitmap::SmallBitmap; -use crate::new::{QueryGraph, QueryNode, SearchContext}; -use crate::{Result, RoaringBitmapCodec}; use heed::BytesDecode; use roaring::RoaringBitmap; +use super::empty_paths_cache::EmptyPathsCache; +use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; +use crate::search::new::interner::Interned; +use crate::search::new::logger::SearchLogger; +use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations}; +use crate::search::new::resolve_query_graph::resolve_phrase; +use crate::search::new::small_bitmap::SmallBitmap; +use crate::search::new::{QueryGraph, QueryNode, SearchContext}; +use crate::{Result, RoaringBitmapCodec}; + #[derive(Clone)] pub enum TypoEdge { Phrase { phrase: Interned }, diff --git a/milli/src/search/new/ranking_rules.rs b/milli/src/search/new/ranking_rules.rs index 82216c9cf..788f8a496 100644 --- a/milli/src/search/new/ranking_rules.rs +++ b/milli/src/search/new/ranking_rules.rs @@ -1,11 +1,10 @@ -use super::logger::SearchLogger; -use super::QueryGraph; -use super::SearchContext; -use crate::new::graph_based_ranking_rule::GraphBasedRankingRule; -use crate::new::ranking_rule_graph::ProximityGraph; -use crate::new::ranking_rule_graph::TypoGraph; -use crate::new::words::Words; use roaring::RoaringBitmap; + +use super::logger::SearchLogger; +use super::{QueryGraph, SearchContext}; +use crate::search::new::graph_based_ranking_rule::GraphBasedRankingRule; +use crate::search::new::ranking_rule_graph::{ProximityGraph, TypoGraph}; +use crate::search::new::words::Words; // use crate::search::new::sort::Sort; use crate::{Result, TermsMatchingStrategy}; @@ -239,16 +238,18 @@ pub fn apply_ranking_rules<'search>( #[cfg(test)] mod tests { // use crate::allocator::ALLOC; - use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; - use crate::new::{execute_search, SearchContext}; - use big_s::S; - use heed::EnvOpenOptions; - use maplit::hashset; use std::fs::File; use std::io::{BufRead, BufReader, Cursor, Seek}; use std::time::Instant; - // use crate::new::logger::detailed::DetailedSearchLogger; - use crate::new::logger::DefaultSearchLogger; + + use big_s::S; + use heed::EnvOpenOptions; + use maplit::hashset; + + use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; + // use crate::search::new::logger::detailed::DetailedSearchLogger; + use crate::search::new::logger::DefaultSearchLogger; + use crate::search::new::{execute_search, SearchContext}; use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy}; @@ -265,7 +266,7 @@ mod tests { // loop { let start = Instant::now(); - // let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log"); + // let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log"); let mut ctx = SearchContext::new(&index, &txn); let results = execute_search( &mut ctx, @@ -362,7 +363,7 @@ mod tests { // loop { let start = Instant::now(); - let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log"); + let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log"); let mut ctx = SearchContext::new(&index, &txn); let results = execute_search( &mut ctx, diff --git a/milli/src/search/new/resolve_query_graph.rs b/milli/src/search/new/resolve_query_graph.rs index 4fa0912e1..0581341d1 100644 --- a/milli/src/search/new/resolve_query_graph.rs +++ b/milli/src/search/new/resolve_query_graph.rs @@ -1,12 +1,14 @@ +use std::collections::VecDeque; + +use fxhash::FxHashMap; +use heed::BytesDecode; +use roaring::{MultiOps, RoaringBitmap}; + use super::interner::Interned; use super::query_term::{Phrase, QueryTerm, WordDerivations}; use super::small_bitmap::SmallBitmap; use super::{QueryGraph, QueryNode, SearchContext}; use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec}; -use fxhash::FxHashMap; -use heed::BytesDecode; -use roaring::{MultiOps, RoaringBitmap}; -use std::collections::VecDeque; // TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc. #[derive(Default)] diff --git a/milli/src/search/new/sort.rs b/milli/src/search/new/sort.rs index f0967843b..d5a6276ad 100644 --- a/milli/src/search/new/sort.rs +++ b/milli/src/search/new/sort.rs @@ -1,3 +1,5 @@ +use roaring::RoaringBitmap; + use super::logger::SearchLogger; use super::{ RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper, @@ -11,7 +13,6 @@ use crate::{ Index, Result, }; -use roaring::RoaringBitmap; pub struct Sort<'search, Query> { field_name: String, diff --git a/milli/src/search/new/words.rs b/milli/src/search/new/words.rs index 9ad8b33ba..2858e1569 100644 --- a/milli/src/search/new/words.rs +++ b/milli/src/search/new/words.rs @@ -1,9 +1,11 @@ +use std::collections::BTreeSet; + +use roaring::RoaringBitmap; + use super::logger::SearchLogger; use super::resolve_query_graph::resolve_query_graph; use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput, SearchContext}; use crate::{Result, TermsMatchingStrategy}; -use roaring::RoaringBitmap; -use std::collections::BTreeSet; pub struct Words { exhausted: bool,