diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 6de737042..66dd33036 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -1,5 +1,5 @@ #![cfg_attr(all(test, fuzzing), feature(no_coverage))] -#![allow(unused, clippy::type_complexity)] +#![allow(clippy::type_complexity)] #[cfg(test)] #[global_allocator] diff --git a/milli/src/search/new/graph_based_ranking_rule.rs b/milli/src/search/new/graph_based_ranking_rule.rs index a466714e3..2bf7885bd 100644 --- a/milli/src/search/new/graph_based_ranking_rule.rs +++ b/milli/src/search/new/graph_based_ranking_rule.rs @@ -9,7 +9,7 @@ use super::ranking_rule_graph::empty_paths_cache::EmptyPathsCache; use super::ranking_rule_graph::paths_map::PathsMap; use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait}; use super::{QueryGraph, RankingRule, RankingRuleOutput}; -use crate::new::ranking_rule_graph::cheapest_paths::{self, Path}; + use crate::{Index, Result}; pub struct GraphBasedRankingRule { @@ -40,8 +40,8 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap index: &Index, txn: &'transaction RoTxn, db_cache: &mut DatabaseCache<'transaction>, - logger: &mut dyn SearchLogger, - universe: &RoaringBitmap, + _logger: &mut dyn SearchLogger, + _universe: &RoaringBitmap, query_graph: &QueryGraph, ) -> Result<()> { // TODO: update old state instead of starting from scratch @@ -117,7 +117,7 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap _index: &Index, _txn: &'transaction RoTxn, _db_cache: &mut DatabaseCache<'transaction>, - logger: &mut dyn SearchLogger, + _logger: &mut dyn SearchLogger, ) { self.state = None; } diff --git a/milli/src/search/new/logger/detailed.rs b/milli/src/search/new/logger/detailed.rs index dc79a8d29..81571c14a 100644 --- a/milli/src/search/new/logger/detailed.rs +++ b/milli/src/search/new/logger/detailed.rs @@ -1,10 +1,9 @@ use rand::random; use roaring::RoaringBitmap; use std::fs::File; -use std::path::Path; use std::{io::Write, path::PathBuf}; -use crate::new::QueryNode; +use crate::new::{QueryNode, QueryGraph}; use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; use crate::new::ranking_rule_graph::empty_paths_cache::EmptyPathsCache; use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait}; @@ -12,7 +11,7 @@ use crate::new::ranking_rule_graph::{ paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph, }; -use super::{QueryGraph, RankingRule, RankingRuleQueryTrait, SearchLogger}; +use super::{RankingRule, SearchLogger}; pub enum SearchEvents { RankingRuleStartIteration { @@ -76,7 +75,7 @@ impl SearchLogger for DetailedSearchLogger { fn start_iteration_ranking_rule<'transaction>( &mut self, ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, + _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, query: &QueryGraph, universe: &RoaringBitmap, ) { @@ -90,7 +89,7 @@ impl SearchLogger for DetailedSearchLogger { fn next_bucket_ranking_rule<'transaction>( &mut self, ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, + _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, universe: &RoaringBitmap, ) { self.events.push(SearchEvents::RankingRuleNextBucket { @@ -101,7 +100,7 @@ impl SearchLogger for DetailedSearchLogger { fn skip_bucket_ranking_rule<'transaction>( &mut self, ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, + _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, candidates: &RoaringBitmap, ) { self.events.push(SearchEvents::RankingRuleSkipBucket { @@ -113,7 +112,7 @@ impl SearchLogger for DetailedSearchLogger { fn end_iteration_ranking_rule<'transaction>( &mut self, ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, + _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, universe: &RoaringBitmap, ) { self.events.push(SearchEvents::RankingRuleEndIteration { @@ -138,7 +137,6 @@ impl SearchLogger for DetailedSearchLogger { impl DetailedSearchLogger { pub fn write_d2_description(&self) { - let mut timestamp_idx = 0; let mut timestamp = vec![]; fn activated_id(timestamp: &[usize]) -> String { let mut s = String::new(); @@ -152,14 +150,14 @@ impl DetailedSearchLogger { let index_path = self.folder_path.join("index.d2"); let mut file = std::fs::File::create(index_path).unwrap(); writeln!(&mut file, "Control Flow Between Ranking Rules: {{").unwrap(); - writeln!(&mut file, "shape: sequence_diagram"); + writeln!(&mut file, "shape: sequence_diagram").unwrap(); for (idx, rr_id) in self.ranking_rules_ids.as_ref().unwrap().iter().enumerate() { writeln!(&mut file, "{idx}: {rr_id}").unwrap(); } - writeln!(&mut file, "results"); + writeln!(&mut file, "results").unwrap(); for event in self.events.iter() { match event { - SearchEvents::RankingRuleStartIteration { query, universe, ranking_rule_idx } => { + SearchEvents::RankingRuleStartIteration { ranking_rule_idx, .. } => { let parent_activated_id = activated_id(×tamp); timestamp.push(0); @@ -179,7 +177,7 @@ impl DetailedSearchLogger { }} }}").unwrap(); } - SearchEvents::RankingRuleNextBucket { universe, ranking_rule_idx } => { + SearchEvents::RankingRuleNextBucket { ranking_rule_idx, .. } => { let old_activated_id = activated_id(×tamp); *timestamp.last_mut().unwrap() += 1; let next_activated_id = activated_id(×tamp); @@ -196,7 +194,7 @@ impl DetailedSearchLogger { "{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : skip bucket ({len})",) .unwrap(); } - SearchEvents::RankingRuleEndIteration { universe, ranking_rule_idx } => { + SearchEvents::RankingRuleEndIteration { ranking_rule_idx, .. } => { let cur_activated_id = activated_id(×tamp); timestamp.pop(); let parent_activated_id = activated_id(×tamp); @@ -238,7 +236,7 @@ results.{random} {{ let cur_ranking_rule = timestamp.len() - 1; let cur_activated_id = activated_id(×tamp); let id = format!("{cur_ranking_rule}.{cur_activated_id}"); - let mut new_file_path = self.folder_path.join(format!("{id}.d2")); + let new_file_path = self.folder_path.join(format!("{id}.d2")); let mut new_file = std::fs::File::create(new_file_path).unwrap(); Self::query_graph_d2_description(query_graph, &mut new_file); writeln!( @@ -251,7 +249,7 @@ results.{random} {{ let cur_ranking_rule = timestamp.len() - 1; let cur_activated_id = activated_id(×tamp); let id = format!("{cur_ranking_rule}.{cur_activated_id}"); - let mut new_file_path = self.folder_path.join(format!("{id}.d2")); + let new_file_path = self.folder_path.join(format!("{id}.d2")); let mut new_file = std::fs::File::create(new_file_path).unwrap(); Self::proximity_graph_d2_description(graph, paths, empty_paths_cache, &mut new_file); writeln!( @@ -262,12 +260,12 @@ results.{random} {{ }, } } - writeln!(&mut file, "}}"); + writeln!(&mut file, "}}").unwrap(); } fn query_node_d2_desc(node_idx: usize, node: &QueryNode, file: &mut File) { match &node { - QueryNode::Term(LocatedQueryTerm { value, positions }) => { + QueryNode::Term(LocatedQueryTerm { value, .. }) => { match value { QueryTerm::Phrase(_) => todo!(), QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db } } => { @@ -299,7 +297,7 @@ shape: class").unwrap(); } } fn query_graph_d2_description(query_graph: &QueryGraph, file: &mut File) { - writeln!(file,"direction: right"); + writeln!(file,"direction: right").unwrap(); for node in 0..query_graph.nodes.len() { if matches!(query_graph.nodes[node], QueryNode::Deleted) { continue; @@ -322,21 +320,21 @@ shape: class").unwrap(); Self::query_node_d2_desc(node_idx, node, file); } for edge in graph.all_edges.iter().flatten() { - let Edge { from_node, to_node, cost, details } = edge; + let Edge { from_node, to_node, details, .. } = edge; match &details { EdgeDetails::Unconditional => { writeln!(file, "{from_node} -> {to_node} : \"always cost {cost}\"", cost = edge.cost, - ); + ).unwrap(); } EdgeDetails::Data(details) => { writeln!(file, "{from_node} -> {to_node} : \"cost {cost} {edge_label}\"", cost = edge.cost, edge_label = ProximityGraph::graphviz_edge_details_label(details) - ); + ).unwrap(); } } } diff --git a/milli/src/search/new/logger/mod.rs b/milli/src/search/new/logger/mod.rs index fd39819ed..6b1f95152 100644 --- a/milli/src/search/new/logger/mod.rs +++ b/milli/src/search/new/logger/mod.rs @@ -4,62 +4,61 @@ pub mod detailed; use roaring::RoaringBitmap; use super::{ - query_graph, ranking_rule_graph::{ empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph, }, - QueryGraph, RankingRule, RankingRuleQueryTrait, + RankingRule, RankingRuleQueryTrait, }; pub struct DefaultSearchLogger; impl SearchLogger for DefaultSearchLogger { - fn initial_query(&mut self, query: &Q) {} + fn initial_query(&mut self, _query: &Q) {} - fn initial_universe(&mut self, universe: &RoaringBitmap) {} + fn initial_universe(&mut self, _universe: &RoaringBitmap) {} - fn ranking_rules(&mut self, rr: &[Box>]) {} + fn ranking_rules(&mut self, _rr: &[Box>]) {} fn start_iteration_ranking_rule<'transaction>( &mut self, - ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule<'transaction, Q>, - query: &Q, - universe: &RoaringBitmap, + _ranking_rule_idx: usize, + _ranking_rule: &dyn RankingRule<'transaction, Q>, + _query: &Q, + _universe: &RoaringBitmap, ) { } fn next_bucket_ranking_rule<'transaction>( &mut self, - ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule<'transaction, Q>, - universe: &RoaringBitmap, + _ranking_rule_idx: usize, + _ranking_rule: &dyn RankingRule<'transaction, Q>, + _universe: &RoaringBitmap, ) { } fn skip_bucket_ranking_rule<'transaction>( &mut self, - ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule<'transaction, Q>, - candidates: &RoaringBitmap, + _ranking_rule_idx: usize, + _ranking_rule: &dyn RankingRule<'transaction, Q>, + _candidates: &RoaringBitmap, ) { } fn end_iteration_ranking_rule<'transaction>( &mut self, - ranking_rule_idx: usize, - ranking_rule: &dyn RankingRule<'transaction, Q>, - universe: &RoaringBitmap, + _ranking_rule_idx: usize, + _ranking_rule: &dyn RankingRule<'transaction, Q>, + _universe: &RoaringBitmap, ) { } - fn add_to_results(&mut self, docids: &[u32]) {} + fn add_to_results(&mut self, _docids: &[u32]) {} - fn log_words_state(&mut self, query_graph: &Q) {} + fn log_words_state(&mut self, _query_graph: &Q) {} fn log_proximity_state( &mut self, - query_graph: &RankingRuleGraph, - paths_map: &PathsMap, - empty_paths_cache: &EmptyPathsCache, + _query_graph: &RankingRuleGraph, + _paths_map: &PathsMap, + _empty_paths_cache: &EmptyPathsCache, ) { } } diff --git a/milli/src/search/new/query_graph.rs b/milli/src/search/new/query_graph.rs index c07343c9b..422896068 100644 --- a/milli/src/search/new/query_graph.rs +++ b/milli/src/search/new/query_graph.rs @@ -1,5 +1,3 @@ -use std::collections::HashSet; -use std::fmt; use std::fmt::Debug; use heed::RoTxn; diff --git a/milli/src/search/new/ranking_rule_graph/build.rs b/milli/src/search/new/ranking_rule_graph/build.rs index 8e7dd7a04..a0fdd79c6 100644 --- a/milli/src/search/new/ranking_rule_graph/build.rs +++ b/milli/src/search/new/ranking_rule_graph/build.rs @@ -1,5 +1,3 @@ -use std::collections::{BTreeSet, HashSet}; - use heed::RoTxn; use roaring::RoaringBitmap; diff --git a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs index e58950c98..e46f6ce66 100644 --- a/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/cheapest_paths.rs @@ -1,6 +1,5 @@ use std::collections::{BTreeMap, HashSet}; -use itertools::Itertools; use roaring::RoaringBitmap; use super::empty_paths_cache::EmptyPathsCache; diff --git a/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs b/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs index dddbda6af..cb3e3da38 100644 --- a/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/edge_docids_cache.rs @@ -1,4 +1,3 @@ -use std::collections::HashMap; use std::marker::PhantomData; use fxhash::FxHashMap; @@ -22,10 +21,7 @@ pub struct EdgeDocidsCache { } impl Default for EdgeDocidsCache { fn default() -> Self { - Self { - cache: Default::default(), - _phantom: Default::default(), - } + Self { cache: Default::default(), _phantom: Default::default() } } } impl EdgeDocidsCache { diff --git a/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs index d8d645092..db68838b5 100644 --- a/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs +++ b/milli/src/search/new/ranking_rule_graph/empty_paths_cache.rs @@ -1,5 +1,3 @@ -use std::collections::HashSet; - use roaring::RoaringBitmap; use super::paths_map::PathsMap; diff --git a/milli/src/search/new/ranking_rule_graph/mod.rs b/milli/src/search/new/ranking_rule_graph/mod.rs index d939b6923..3a396f3dc 100644 --- a/milli/src/search/new/ranking_rule_graph/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/mod.rs @@ -5,7 +5,7 @@ pub mod empty_paths_cache; pub mod paths_map; pub mod proximity; pub mod resolve_paths; -use std::collections::{BTreeSet, HashSet}; + use std::ops::ControlFlow; use heed::RoTxn; @@ -137,7 +137,7 @@ impl RankingRuleGraph { fn remove_edge(&mut self, edge_index: u32) { let edge_opt = &mut self.all_edges[edge_index as usize]; let Some(edge) = &edge_opt else { return }; - let (from_node, to_node) = (edge.from_node, edge.to_node); + let (from_node, _to_node) = (edge.from_node, edge.to_node); *edge_opt = None; let from_node_edges = &mut self.node_edges[from_node as usize]; @@ -168,7 +168,7 @@ impl RankingRuleGraph { desc.push_str(";\n"); } for edge in self.all_edges.iter().flatten() { - let Edge { from_node, to_node, cost, details } = edge; + let Edge { from_node, to_node, details, .. } = edge; match &details { EdgeDetails::Unconditional => { diff --git a/milli/src/search/new/ranking_rule_graph/paths_map.rs b/milli/src/search/new/ranking_rule_graph/paths_map.rs index 8360b1975..111b55140 100644 --- a/milli/src/search/new/ranking_rule_graph/paths_map.rs +++ b/milli/src/search/new/ranking_rule_graph/paths_map.rs @@ -1,5 +1,4 @@ use std::collections::hash_map::DefaultHasher; -use std::collections::HashSet; use std::fmt::Write; use std::hash::{Hash, Hasher}; @@ -206,7 +205,7 @@ impl PathsMap { h.finish() }; for (edge_idx, rest) in self.nodes.iter() { - let Some(Edge { from_node, to_node, cost, details }) = graph.all_edges[*edge_idx as usize].as_ref() else { + let Some(Edge { from_node, to_node, cost, .. }) = graph.all_edges[*edge_idx as usize].as_ref() else { continue; }; let mut path_to = path_from.clone(); @@ -248,7 +247,7 @@ impl RankingRuleGraph { for (edge_idx, edge) in self.all_edges.iter().enumerate() { let Some(edge) = edge else { continue }; - let Edge { from_node, to_node, cost, details } = edge; + let Edge { from_node, to_node, .. } = edge; let color = if path.edges.contains(&(edge_idx as u32)) { "red" } else { "green" }; match &edge.details { EdgeDetails::Unconditional => { diff --git a/milli/src/search/new/ranking_rule_graph/proximity/build.rs b/milli/src/search/new/ranking_rule_graph/proximity/build.rs index bfcac57ee..6d2fefa65 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/build.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/build.rs @@ -7,7 +7,7 @@ use super::ProximityEdge; use crate::new::db_cache::DatabaseCache; use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; use crate::new::ranking_rule_graph::proximity::WordPair; -use crate::new::ranking_rule_graph::{Edge, EdgeDetails}; +use crate::new::ranking_rule_graph::EdgeDetails; use crate::new::QueryNode; use crate::{Index, Result}; diff --git a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs index c823cbf9c..fc1a44310 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/mod.rs @@ -5,7 +5,7 @@ use heed::RoTxn; use super::empty_paths_cache::EmptyPathsCache; use super::paths_map::PathsMap; -use super::{Edge, EdgeDetails, RankingRuleGraphTrait}; +use super::{EdgeDetails, RankingRuleGraphTrait}; use crate::new::db_cache::DatabaseCache; use crate::new::query_term::WordDerivations; use crate::new::QueryNode; diff --git a/milli/src/search/new/ranking_rule_graph/resolve_paths.rs b/milli/src/search/new/ranking_rule_graph/resolve_paths.rs index 1a97dc485..90650340f 100644 --- a/milli/src/search/new/ranking_rule_graph/resolve_paths.rs +++ b/milli/src/search/new/ranking_rule_graph/resolve_paths.rs @@ -8,7 +8,7 @@ use super::empty_paths_cache::EmptyPathsCache; use super::paths_map::PathsMap; use super::{RankingRuleGraph, RankingRuleGraphTrait}; use crate::new::db_cache::DatabaseCache; -use crate::new::ranking_rule_graph::Edge; + use crate::new::BitmapOrAllRef; use crate::{Index, Result}; diff --git a/milli/src/search/new/ranking_rules.rs b/milli/src/search/new/ranking_rules.rs index c7c9d5c97..e78bdff0c 100644 --- a/milli/src/search/new/ranking_rules.rs +++ b/milli/src/search/new/ranking_rules.rs @@ -1,11 +1,9 @@ -use std::fmt::Display; - use heed::RoTxn; use roaring::RoaringBitmap; use super::db_cache::DatabaseCache; use super::logger::SearchLogger; -use super::resolve_query_graph::resolve_query_graph; + use super::QueryGraph; use crate::new::graph_based_ranking_rule::GraphBasedRankingRule; use crate::new::ranking_rule_graph::proximity::ProximityGraph; @@ -172,7 +170,8 @@ pub fn execute_search<'transaction>( let mut results = vec![]; let mut cur_offset = 0usize; - // Add the candidates to the results. Take the `from`, `limit`, and `cur_offset` into account. + // Add the candidates to the results. Take the `from`, `limit`, and `cur_offset` + // into account and inform the logger. macro_rules! maybe_add_to_results { ($candidates:expr) => { let candidates = $candidates; @@ -213,7 +212,6 @@ pub fn execute_search<'transaction>( cur_offset += len as usize; }; } - // TODO: skip buckets when we want to start from an offset while results.len() < length { // The universe for this bucket is zero or one element, so we don't need to sort // anything, just extend the results and go back to the parent ranking rule. @@ -273,7 +271,7 @@ mod tests { use heed::EnvOpenOptions; - use super::{execute_search, get_start_universe}; + use super::execute_search; use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use crate::index::tests::TempIndex; use crate::new::db_cache::DatabaseCache; @@ -344,23 +342,9 @@ mod tests { let mut db_cache = DatabaseCache::default(); - let query_graph = make_query_graph( - &index, - &txn, - &mut db_cache, - "and he was released from prison by the government", - ) - .unwrap(); - - // TODO: filters + maybe distinct attributes? - let universe = get_start_universe( - &index, - &txn, - &mut db_cache, - &query_graph, - TermsMatchingStrategy::Last, - ) - .unwrap(); + let query_graph = + make_query_graph(&index, &txn, &mut db_cache, "released from prison by the government") + .unwrap(); let mut logger = DetailedSearchLogger::new("log"); @@ -370,8 +354,8 @@ mod tests { &mut db_cache, &query_graph, None, - 500, - 100, + 5, + 20, &mut logger, //&mut DefaultSearchLogger, ) .unwrap(); diff --git a/milli/src/search/new/resolve_query_graph.rs b/milli/src/search/new/resolve_query_graph.rs index e752358a7..4da853e7c 100644 --- a/milli/src/search/new/resolve_query_graph.rs +++ b/milli/src/search/new/resolve_query_graph.rs @@ -1,11 +1,11 @@ -use std::collections::{HashMap, HashSet, VecDeque}; +use std::collections::VecDeque; use fxhash::FxHashMap; use heed::{BytesDecode, RoTxn}; use roaring::{MultiOps, RoaringBitmap}; use super::db_cache::DatabaseCache; -use super::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; +use super::query_term::{QueryTerm, WordDerivations}; use super::QueryGraph; use crate::{Index, Result, RoaringBitmapCodec}; diff --git a/milli/src/search/new/words.rs b/milli/src/search/new/words.rs index 63df03f93..e4513eea0 100644 --- a/milli/src/search/new/words.rs +++ b/milli/src/search/new/words.rs @@ -39,8 +39,8 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { _index: &Index, _txn: &'transaction RoTxn, _db_cache: &mut DatabaseCache<'transaction>, - logger: &mut dyn SearchLogger, - parent_candidates: &RoaringBitmap, + _logger: &mut dyn SearchLogger, + _parent_candidates: &RoaringBitmap, parent_query_graph: &QueryGraph, ) -> Result<()> { // println!("Words: start iteration");