mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Add a search logger
This commit is contained in:
parent
dd12d44134
commit
6ba4d5e987
@ -2,6 +2,7 @@ use heed::RoTxn;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::db_cache::DatabaseCache;
|
use super::db_cache::DatabaseCache;
|
||||||
|
use super::logger::SearchLogger;
|
||||||
use super::ranking_rule_graph::cheapest_paths::KCheapestPathsState;
|
use super::ranking_rule_graph::cheapest_paths::KCheapestPathsState;
|
||||||
use super::ranking_rule_graph::edge_docids_cache::EdgeDocidsCache;
|
use super::ranking_rule_graph::edge_docids_cache::EdgeDocidsCache;
|
||||||
use super::ranking_rule_graph::empty_paths_cache::EmptyPathsCache;
|
use super::ranking_rule_graph::empty_paths_cache::EmptyPathsCache;
|
||||||
@ -12,11 +13,12 @@ use crate::new::ranking_rule_graph::cheapest_paths::{self, Path};
|
|||||||
use crate::{Index, Result};
|
use crate::{Index, Result};
|
||||||
|
|
||||||
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
|
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
|
||||||
|
id: String,
|
||||||
state: Option<GraphBasedRankingRuleState<G>>,
|
state: Option<GraphBasedRankingRuleState<G>>,
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> Default for GraphBasedRankingRule<G> {
|
impl<G: RankingRuleGraphTrait> GraphBasedRankingRule<G> {
|
||||||
fn default() -> Self {
|
pub fn new(id: String) -> Self {
|
||||||
Self { state: None }
|
Self { id, state: None }
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -30,11 +32,15 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGraph>
|
impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGraph>
|
||||||
for GraphBasedRankingRule<G>
|
for GraphBasedRankingRule<G>
|
||||||
{
|
{
|
||||||
|
fn id(&self) -> String {
|
||||||
|
self.id.clone()
|
||||||
|
}
|
||||||
fn start_iteration(
|
fn start_iteration(
|
||||||
&mut self,
|
&mut self,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'transaction RoTxn,
|
txn: &'transaction RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'transaction>,
|
db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
query_graph: &QueryGraph,
|
query_graph: &QueryGraph,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@ -59,6 +65,7 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'transaction RoTxn,
|
txn: &'transaction RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'transaction>,
|
db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||||
assert!(universe.len() > 1);
|
assert!(universe.len() > 1);
|
||||||
@ -109,6 +116,7 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
|||||||
_index: &Index,
|
_index: &Index,
|
||||||
_txn: &'transaction RoTxn,
|
_txn: &'transaction RoTxn,
|
||||||
_db_cache: &mut DatabaseCache<'transaction>,
|
_db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) {
|
) {
|
||||||
self.state = None;
|
self.state = None;
|
||||||
}
|
}
|
||||||
|
238
milli/src/search/new/logger/detailed.rs
Normal file
238
milli/src/search/new/logger/detailed.rs
Normal file
@ -0,0 +1,238 @@
|
|||||||
|
use rand::random;
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use std::fs::File;
|
||||||
|
use std::path::Path;
|
||||||
|
use std::{io::Write, path::PathBuf};
|
||||||
|
|
||||||
|
use crate::new::QueryNode;
|
||||||
|
use crate::new::ranking_rule_graph::{
|
||||||
|
paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph,
|
||||||
|
};
|
||||||
|
|
||||||
|
use super::{QueryGraph, RankingRule, RankingRuleQueryTrait, SearchLogger};
|
||||||
|
|
||||||
|
pub enum SearchEvents {
|
||||||
|
RankingRuleStartIteration {
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
query: QueryGraph,
|
||||||
|
universe: RoaringBitmap,
|
||||||
|
},
|
||||||
|
RankingRuleNextBucket {
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
universe: RoaringBitmap,
|
||||||
|
},
|
||||||
|
RankingRuleEndIteration {
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
universe: RoaringBitmap,
|
||||||
|
},
|
||||||
|
ExtendResults {
|
||||||
|
new: RoaringBitmap,
|
||||||
|
},
|
||||||
|
WordsState {
|
||||||
|
query_graph: QueryGraph,
|
||||||
|
},
|
||||||
|
ProximityState {
|
||||||
|
graph: RankingRuleGraph<ProximityGraph>,
|
||||||
|
paths: PathsMap<u64>,
|
||||||
|
},
|
||||||
|
}
|
||||||
|
|
||||||
|
pub struct DetailedSearchLogger {
|
||||||
|
folder_path: PathBuf,
|
||||||
|
initial_query: Option<QueryGraph>,
|
||||||
|
initial_universe: Option<RoaringBitmap>,
|
||||||
|
ranking_rules_ids: Option<Vec<String>>,
|
||||||
|
events: Vec<SearchEvents>,
|
||||||
|
}
|
||||||
|
impl DetailedSearchLogger {
|
||||||
|
pub fn new(folder_path: &str) -> Self {
|
||||||
|
Self {
|
||||||
|
folder_path: PathBuf::new().join(folder_path),
|
||||||
|
initial_query: <_>::default(),
|
||||||
|
initial_universe: <_>::default(),
|
||||||
|
ranking_rules_ids: <_>::default(),
|
||||||
|
events: <_>::default(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||||
|
fn initial_query(&mut self, query: &QueryGraph) {
|
||||||
|
self.initial_query = Some(query.clone());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn initial_universe(&mut self, universe: &RoaringBitmap) {
|
||||||
|
self.initial_universe = Some(universe.clone());
|
||||||
|
}
|
||||||
|
fn ranking_rules(&mut self, rr: &[Box<dyn RankingRule<QueryGraph>>]) {
|
||||||
|
self.ranking_rules_ids = Some(rr.iter().map(|rr| rr.id()).collect());
|
||||||
|
}
|
||||||
|
|
||||||
|
fn start_iteration_ranking_rule<'transaction>(
|
||||||
|
&mut self,
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
||||||
|
query: &QueryGraph,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
|
) {
|
||||||
|
self.events.push(SearchEvents::RankingRuleStartIteration {
|
||||||
|
ranking_rule_idx,
|
||||||
|
query: query.clone(),
|
||||||
|
universe: universe.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_bucket_ranking_rule<'transaction>(
|
||||||
|
&mut self,
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
|
) {
|
||||||
|
self.events.push(SearchEvents::RankingRuleNextBucket {
|
||||||
|
ranking_rule_idx,
|
||||||
|
universe: universe.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_iteration_ranking_rule<'transaction>(
|
||||||
|
&mut self,
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
|
) {
|
||||||
|
self.events.push(SearchEvents::RankingRuleEndIteration {
|
||||||
|
ranking_rule_idx,
|
||||||
|
universe: universe.clone(),
|
||||||
|
})
|
||||||
|
}
|
||||||
|
fn add_to_results(&mut self, docids: &RoaringBitmap) {
|
||||||
|
self.events.push(SearchEvents::ExtendResults { new: docids.clone() });
|
||||||
|
}
|
||||||
|
|
||||||
|
fn log_words_state(&mut self, query_graph: &QueryGraph) {
|
||||||
|
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
impl DetailedSearchLogger {
|
||||||
|
pub fn write_d2_description(&self) {
|
||||||
|
let mut timestamp_idx = 0;
|
||||||
|
let mut timestamp = vec![];
|
||||||
|
fn activated_id(timestamp: &[usize]) -> String {
|
||||||
|
let mut s = String::new();
|
||||||
|
s.push('0');
|
||||||
|
for t in timestamp.iter() {
|
||||||
|
s.push_str(&format!("{t}"));
|
||||||
|
}
|
||||||
|
s
|
||||||
|
}
|
||||||
|
|
||||||
|
let index_path = self.folder_path.join("index.d2");
|
||||||
|
let mut file = std::fs::File::create(&index_path).unwrap();
|
||||||
|
writeln!(&mut file, "Control Flow Between Ranking Rules: {{").unwrap();
|
||||||
|
writeln!(&mut file, "shape: sequence_diagram");
|
||||||
|
for (idx, rr_id) in self.ranking_rules_ids.as_ref().unwrap().iter().enumerate() {
|
||||||
|
writeln!(&mut file, "{idx}: {rr_id}").unwrap();
|
||||||
|
}
|
||||||
|
writeln!(&mut file, "results");
|
||||||
|
for event in self.events.iter() {
|
||||||
|
match event {
|
||||||
|
SearchEvents::RankingRuleStartIteration { query, universe, ranking_rule_idx } => {
|
||||||
|
|
||||||
|
let parent_activated_id = activated_id(×tamp);
|
||||||
|
timestamp.push(0);
|
||||||
|
let self_activated_id = activated_id(×tamp);
|
||||||
|
if *ranking_rule_idx != 0 {
|
||||||
|
let parent_ranking_rule_idx = ranking_rule_idx - 1;
|
||||||
|
writeln!(
|
||||||
|
&mut file,
|
||||||
|
"{parent_ranking_rule_idx}.{parent_activated_id} -> {ranking_rule_idx}.{self_activated_id} : start iteration",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
writeln!(&mut file,
|
||||||
|
"{ranking_rule_idx}.{self_activated_id} {{
|
||||||
|
style {{
|
||||||
|
fill: \"#D8A7B1\"
|
||||||
|
}}
|
||||||
|
}}").unwrap();
|
||||||
|
}
|
||||||
|
SearchEvents::RankingRuleNextBucket { universe, ranking_rule_idx } => {
|
||||||
|
let old_activated_id = activated_id(×tamp);
|
||||||
|
*timestamp.last_mut().unwrap() += 1;
|
||||||
|
let next_activated_id = activated_id(×tamp);
|
||||||
|
writeln!(&mut file,
|
||||||
|
"{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : next bucket",)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
SearchEvents::RankingRuleEndIteration { universe, ranking_rule_idx } => {
|
||||||
|
let cur_activated_id = activated_id(×tamp);
|
||||||
|
timestamp.pop();
|
||||||
|
let parent_activated_id = activated_id(×tamp);
|
||||||
|
let parent_ranking_rule = if *ranking_rule_idx == 0 {
|
||||||
|
"start".to_owned()
|
||||||
|
} else {
|
||||||
|
format!("{}.{parent_activated_id}", ranking_rule_idx - 1)
|
||||||
|
};
|
||||||
|
writeln!(
|
||||||
|
&mut file,
|
||||||
|
"{ranking_rule_idx}.{cur_activated_id} -> {parent_ranking_rule} : end iteration",
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
SearchEvents::ExtendResults { new } => {
|
||||||
|
if new.is_empty() {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
let cur_ranking_rule = timestamp.len() - 1;
|
||||||
|
let cur_activated_id = activated_id(×tamp);
|
||||||
|
let docids = new.iter().collect::<Vec<_>>();
|
||||||
|
let len = new.len();
|
||||||
|
let random = random::<u64>();
|
||||||
|
|
||||||
|
writeln!(
|
||||||
|
&mut file,
|
||||||
|
"{cur_ranking_rule}.{cur_activated_id} -> results.{random} : \"add {len}\"
|
||||||
|
results.{random} {{
|
||||||
|
tooltip: \"{docids:?}\"
|
||||||
|
style {{
|
||||||
|
fill: \"#B6E2D3\"
|
||||||
|
}}
|
||||||
|
}}
|
||||||
|
"
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
},
|
||||||
|
SearchEvents::WordsState { query_graph } => {
|
||||||
|
let cur_ranking_rule = timestamp.len() - 1;
|
||||||
|
let cur_activated_id = activated_id(×tamp);
|
||||||
|
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||||
|
let mut new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||||
|
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||||
|
Self::query_graph_d2_description(&query_graph, &mut new_file);
|
||||||
|
writeln!(
|
||||||
|
&mut file,
|
||||||
|
"{id} {{
|
||||||
|
link: \"{id}.d2.svg\"
|
||||||
|
}}").unwrap();
|
||||||
|
},
|
||||||
|
SearchEvents::ProximityState { graph, paths } => todo!(),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
writeln!(&mut file, "}}");
|
||||||
|
}
|
||||||
|
fn query_graph_d2_description(query_graph: &QueryGraph, file: &mut File) {
|
||||||
|
writeln!(file,"direction: right");
|
||||||
|
for node in 0..query_graph.nodes.len() {
|
||||||
|
if matches!(query_graph.nodes[node], QueryNode::Deleted) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
writeln!(file,"{node}");
|
||||||
|
|
||||||
|
for edge in query_graph.edges[node].successors.iter() {
|
||||||
|
writeln!(file, "{node} -> {edge};\n").unwrap();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
72
milli/src/search/new/logger/mod.rs
Normal file
72
milli/src/search/new/logger/mod.rs
Normal file
@ -0,0 +1,72 @@
|
|||||||
|
pub mod detailed;
|
||||||
|
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use super::{query_graph, QueryGraph, RankingRule, RankingRuleQueryTrait};
|
||||||
|
|
||||||
|
pub struct DefaultSearchLogger;
|
||||||
|
impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||||
|
fn initial_query(&mut self, query: &Q) {}
|
||||||
|
|
||||||
|
fn initial_universe(&mut self, universe: &RoaringBitmap) {}
|
||||||
|
|
||||||
|
fn ranking_rules(&mut self, rr: &[Box<dyn RankingRule<Q>>]) {}
|
||||||
|
fn start_iteration_ranking_rule<'transaction>(
|
||||||
|
&mut self,
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
ranking_rule: &dyn RankingRule<'transaction, Q>,
|
||||||
|
query: &Q,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
|
fn next_bucket_ranking_rule<'transaction>(
|
||||||
|
&mut self,
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
ranking_rule: &dyn RankingRule<'transaction, Q>,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
|
fn end_iteration_ranking_rule<'transaction>(
|
||||||
|
&mut self,
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
ranking_rule: &dyn RankingRule<'transaction, Q>,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
|
) {
|
||||||
|
}
|
||||||
|
|
||||||
|
fn add_to_results(&mut self, docids: &RoaringBitmap) {}
|
||||||
|
|
||||||
|
fn log_words_state(&mut self, query_graph: &Q) {}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||||
|
fn initial_query(&mut self, query: &Q);
|
||||||
|
fn initial_universe(&mut self, universe: &RoaringBitmap);
|
||||||
|
|
||||||
|
fn ranking_rules(&mut self, rr: &[Box<dyn RankingRule<Q>>]);
|
||||||
|
|
||||||
|
fn start_iteration_ranking_rule<'transaction>(
|
||||||
|
&mut self,
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
ranking_rule: &dyn RankingRule<'transaction, Q>,
|
||||||
|
query: &Q,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
|
);
|
||||||
|
fn next_bucket_ranking_rule<'transaction>(
|
||||||
|
&mut self,
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
ranking_rule: &dyn RankingRule<'transaction, Q>,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
|
);
|
||||||
|
fn end_iteration_ranking_rule<'transaction>(
|
||||||
|
&mut self,
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
ranking_rule: &dyn RankingRule<'transaction, Q>,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
|
);
|
||||||
|
fn add_to_results(&mut self, docids: &RoaringBitmap);
|
||||||
|
|
||||||
|
fn log_words_state(&mut self, query_graph: &Q);
|
||||||
|
}
|
@ -1,5 +1,6 @@
|
|||||||
pub mod db_cache;
|
pub mod db_cache;
|
||||||
pub mod graph_based_ranking_rule;
|
pub mod graph_based_ranking_rule;
|
||||||
|
pub mod logger;
|
||||||
pub mod query_graph;
|
pub mod query_graph;
|
||||||
pub mod query_term;
|
pub mod query_term;
|
||||||
pub mod ranking_rule_graph;
|
pub mod ranking_rule_graph;
|
||||||
|
@ -1,7 +1,10 @@
|
|||||||
|
use std::fmt::Display;
|
||||||
|
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::db_cache::DatabaseCache;
|
use super::db_cache::DatabaseCache;
|
||||||
|
use super::logger::SearchLogger;
|
||||||
use super::resolve_query_graph::resolve_query_graph;
|
use super::resolve_query_graph::resolve_query_graph;
|
||||||
use super::QueryGraph;
|
use super::QueryGraph;
|
||||||
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
|
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
|
||||||
@ -43,6 +46,8 @@ impl RankingRuleQueryTrait for PlaceholderQuery {}
|
|||||||
impl RankingRuleQueryTrait for QueryGraph {}
|
impl RankingRuleQueryTrait for QueryGraph {}
|
||||||
|
|
||||||
pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
|
pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
|
||||||
|
fn id(&self) -> String;
|
||||||
|
|
||||||
/// Prepare the ranking rule such that it can start iterating over its
|
/// Prepare the ranking rule such that it can start iterating over its
|
||||||
/// buckets using [`next_bucket`](RankingRule::next_bucket).
|
/// buckets using [`next_bucket`](RankingRule::next_bucket).
|
||||||
///
|
///
|
||||||
@ -52,6 +57,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'transaction RoTxn,
|
txn: &'transaction RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'transaction>,
|
db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
logger: &mut dyn SearchLogger<Query>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
query: &Query,
|
query: &Query,
|
||||||
) -> Result<()>;
|
) -> Result<()>;
|
||||||
@ -68,6 +74,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'transaction RoTxn,
|
txn: &'transaction RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'transaction>,
|
db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
logger: &mut dyn SearchLogger<Query>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<Option<RankingRuleOutput<Query>>>;
|
) -> Result<Option<RankingRuleOutput<Query>>>;
|
||||||
|
|
||||||
@ -78,6 +85,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'transaction RoTxn,
|
txn: &'transaction RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'transaction>,
|
db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
logger: &mut dyn SearchLogger<Query>,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -110,28 +118,36 @@ pub fn execute_search<'transaction>(
|
|||||||
db_cache: &mut DatabaseCache<'transaction>,
|
db_cache: &mut DatabaseCache<'transaction>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
query_graph: &QueryGraph,
|
query_graph: &QueryGraph,
|
||||||
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
// _from: usize,
|
// _from: usize,
|
||||||
// _length: usize,
|
// _length: usize,
|
||||||
) -> Result<Vec<u32>> {
|
) -> Result<Vec<u32>> {
|
||||||
let words = Words::new(TermsMatchingStrategy::Last);
|
let words = Words::new(TermsMatchingStrategy::Last);
|
||||||
// let sort = Sort::new(index, txn, "sort1".to_owned(), true)?;
|
// let sort = Sort::new(index, txn, "sort1".to_owned(), true)?;
|
||||||
let proximity = GraphBasedRankingRule::<ProximityGraph>::default();
|
let proximity = GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned());
|
||||||
// TODO: ranking rules given as argument
|
// TODO: ranking rules given as argument
|
||||||
let mut ranking_rules: Vec<Box<dyn RankingRule<'transaction, QueryGraph>>> =
|
let mut ranking_rules: Vec<Box<dyn RankingRule<'transaction, QueryGraph>>> =
|
||||||
vec![Box::new(words), Box::new(proximity) /* Box::new(sort) */];
|
vec![Box::new(words), Box::new(proximity) /* Box::new(sort) */];
|
||||||
|
|
||||||
|
logger.ranking_rules(&ranking_rules);
|
||||||
|
|
||||||
let ranking_rules_len = ranking_rules.len();
|
let ranking_rules_len = ranking_rules.len();
|
||||||
ranking_rules[0].start_iteration(index, txn, db_cache, universe, query_graph)?;
|
logger.start_iteration_ranking_rule(0, ranking_rules[0].as_ref(), query_graph, universe);
|
||||||
|
ranking_rules[0].start_iteration(index, txn, db_cache, logger, universe, query_graph)?;
|
||||||
|
|
||||||
let mut candidates = vec![RoaringBitmap::default(); ranking_rules_len];
|
let mut candidates = vec![RoaringBitmap::default(); ranking_rules_len];
|
||||||
candidates[0] = universe.clone();
|
candidates[0] = universe.clone();
|
||||||
|
|
||||||
let mut cur_ranking_rule_index = 0;
|
let mut cur_ranking_rule_index = 0;
|
||||||
|
|
||||||
macro_rules! back {
|
macro_rules! back {
|
||||||
() => {
|
() => {
|
||||||
|
logger.end_iteration_ranking_rule(
|
||||||
|
cur_ranking_rule_index,
|
||||||
|
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||||
|
&candidates[cur_ranking_rule_index],
|
||||||
|
);
|
||||||
candidates[cur_ranking_rule_index].clear();
|
candidates[cur_ranking_rule_index].clear();
|
||||||
ranking_rules[cur_ranking_rule_index].end_iteration(index, txn, db_cache);
|
ranking_rules[cur_ranking_rule_index].end_iteration(index, txn, db_cache, logger);
|
||||||
if cur_ranking_rule_index == 0 {
|
if cur_ranking_rule_index == 0 {
|
||||||
break;
|
break;
|
||||||
} else {
|
} else {
|
||||||
@ -146,11 +162,19 @@ pub fn execute_search<'transaction>(
|
|||||||
// The universe for this bucket is zero or one element, so we don't need to sort
|
// The universe for this bucket is zero or one element, so we don't need to sort
|
||||||
// anything, just extend the results and go back to the parent ranking rule.
|
// anything, just extend the results and go back to the parent ranking rule.
|
||||||
if candidates[cur_ranking_rule_index].len() <= 1 {
|
if candidates[cur_ranking_rule_index].len() <= 1 {
|
||||||
|
logger.add_to_results(&candidates[cur_ranking_rule_index]);
|
||||||
results.extend(&candidates[cur_ranking_rule_index]);
|
results.extend(&candidates[cur_ranking_rule_index]);
|
||||||
back!();
|
back!();
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(index, txn, db_cache, &candidates[cur_ranking_rule_index])? else {
|
|
||||||
|
logger.next_bucket_ranking_rule(
|
||||||
|
cur_ranking_rule_index,
|
||||||
|
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||||
|
&candidates[cur_ranking_rule_index],
|
||||||
|
);
|
||||||
|
|
||||||
|
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(index, txn, db_cache, logger, &candidates[cur_ranking_rule_index])? else {
|
||||||
back!();
|
back!();
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
@ -159,20 +183,29 @@ pub fn execute_search<'transaction>(
|
|||||||
|
|
||||||
if next_bucket.candidates.len() <= 1 {
|
if next_bucket.candidates.len() <= 1 {
|
||||||
// Only zero or one candidate, no need to sort through the child ranking rule.
|
// Only zero or one candidate, no need to sort through the child ranking rule.
|
||||||
|
logger.add_to_results(&next_bucket.candidates);
|
||||||
results.extend(next_bucket.candidates);
|
results.extend(next_bucket.candidates);
|
||||||
continue;
|
continue;
|
||||||
} else {
|
} else {
|
||||||
// many candidates, give to next ranking rule, if any
|
// many candidates, give to next ranking rule, if any
|
||||||
if cur_ranking_rule_index == ranking_rules_len - 1 {
|
if cur_ranking_rule_index == ranking_rules_len - 1 {
|
||||||
// TODO: don't extend too much, up to the limit only
|
// TODO: don't extend too much, up to the limit only
|
||||||
|
logger.add_to_results(&next_bucket.candidates);
|
||||||
results.extend(next_bucket.candidates);
|
results.extend(next_bucket.candidates);
|
||||||
} else {
|
} else {
|
||||||
cur_ranking_rule_index += 1;
|
cur_ranking_rule_index += 1;
|
||||||
candidates[cur_ranking_rule_index] = next_bucket.candidates.clone();
|
candidates[cur_ranking_rule_index] = next_bucket.candidates.clone();
|
||||||
|
logger.start_iteration_ranking_rule(
|
||||||
|
cur_ranking_rule_index,
|
||||||
|
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||||
|
&next_bucket.query,
|
||||||
|
&candidates[cur_ranking_rule_index],
|
||||||
|
);
|
||||||
ranking_rules[cur_ranking_rule_index].start_iteration(
|
ranking_rules[cur_ranking_rule_index].start_iteration(
|
||||||
index,
|
index,
|
||||||
txn,
|
txn,
|
||||||
db_cache,
|
db_cache,
|
||||||
|
logger,
|
||||||
&next_bucket.candidates,
|
&next_bucket.candidates,
|
||||||
&next_bucket.query,
|
&next_bucket.query,
|
||||||
)?;
|
)?;
|
||||||
@ -195,6 +228,8 @@ mod tests {
|
|||||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::new::db_cache::DatabaseCache;
|
use crate::new::db_cache::DatabaseCache;
|
||||||
|
use crate::new::logger::detailed::DetailedSearchLogger;
|
||||||
|
use crate::new::logger::{DefaultSearchLogger, SearchLogger};
|
||||||
use crate::new::make_query_graph;
|
use crate::new::make_query_graph;
|
||||||
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy};
|
use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy};
|
||||||
@ -231,13 +266,14 @@ mod tests {
|
|||||||
]))
|
]))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
let txn = index.read_txn().unwrap();
|
let txn = index.read_txn().unwrap();
|
||||||
|
let mut logger = DefaultSearchLogger;
|
||||||
let mut db_cache = DatabaseCache::default();
|
let mut db_cache = DatabaseCache::default();
|
||||||
|
|
||||||
let query_graph =
|
let query_graph =
|
||||||
make_query_graph(&index, &txn, &mut db_cache, "the quick brown fox jumps over")
|
make_query_graph(&index, &txn, &mut db_cache, "the quick brown fox jumps over")
|
||||||
.unwrap();
|
.unwrap();
|
||||||
println!("{}", query_graph.graphviz());
|
println!("{}", query_graph.graphviz());
|
||||||
|
logger.initial_query(&query_graph);
|
||||||
|
|
||||||
// TODO: filters + maybe distinct attributes?
|
// TODO: filters + maybe distinct attributes?
|
||||||
let universe = get_start_universe(
|
let universe = get_start_universe(
|
||||||
@ -250,9 +286,15 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
println!("universe: {universe:?}");
|
println!("universe: {universe:?}");
|
||||||
|
|
||||||
let results =
|
let results = execute_search(
|
||||||
execute_search(&index, &txn, &mut db_cache, &universe, &query_graph /* 0, 20 */)
|
&index,
|
||||||
.unwrap();
|
&txn,
|
||||||
|
&mut db_cache,
|
||||||
|
&universe,
|
||||||
|
&query_graph,
|
||||||
|
&mut logger, /* 0, 20 */
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
println!("{results:?}")
|
println!("{results:?}")
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -285,9 +327,19 @@ mod tests {
|
|||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
let results =
|
let mut logger = DetailedSearchLogger::new("log");
|
||||||
execute_search(&index, &txn, &mut db_cache, &universe, &query_graph /* 0, 20 */)
|
|
||||||
.unwrap();
|
let results = execute_search(
|
||||||
|
&index,
|
||||||
|
&txn,
|
||||||
|
&mut db_cache,
|
||||||
|
&universe,
|
||||||
|
&query_graph,
|
||||||
|
&mut logger, /* 0, 20 */
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
logger.write_d2_description();
|
||||||
|
|
||||||
let elapsed = start.elapsed();
|
let elapsed = start.elapsed();
|
||||||
|
|
||||||
|
@ -2,6 +2,7 @@ use heed::RoTxn;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::db_cache::DatabaseCache;
|
use super::db_cache::DatabaseCache;
|
||||||
|
use super::logger::SearchLogger;
|
||||||
use super::{
|
use super::{
|
||||||
RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper,
|
RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper,
|
||||||
RankingRuleQueryTrait,
|
RankingRuleQueryTrait,
|
||||||
@ -20,6 +21,7 @@ use crate::{
|
|||||||
// (2) at the end, it should return all the remaining documents (this could be ensured at the trait level?)
|
// (2) at the end, it should return all the remaining documents (this could be ensured at the trait level?)
|
||||||
|
|
||||||
pub struct Sort<'transaction, Query> {
|
pub struct Sort<'transaction, Query> {
|
||||||
|
field_name: String,
|
||||||
field_id: Option<FieldId>,
|
field_id: Option<FieldId>,
|
||||||
is_ascending: bool,
|
is_ascending: bool,
|
||||||
iter: Option<RankingRuleOutputIterWrapper<'transaction, Query>>,
|
iter: Option<RankingRuleOutputIterWrapper<'transaction, Query>>,
|
||||||
@ -34,18 +36,23 @@ impl<'transaction, Query> Sort<'transaction, Query> {
|
|||||||
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
let fields_ids_map = index.fields_ids_map(rtxn)?;
|
||||||
let field_id = fields_ids_map.id(&field_name);
|
let field_id = fields_ids_map.id(&field_name);
|
||||||
|
|
||||||
Ok(Self { field_id, is_ascending, iter: None })
|
Ok(Self { field_name, field_id, is_ascending, iter: None })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query>
|
impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query>
|
||||||
for Sort<'transaction, Query>
|
for Sort<'transaction, Query>
|
||||||
{
|
{
|
||||||
|
fn id(&self) -> String {
|
||||||
|
let Self { field_name, is_ascending, .. } = self;
|
||||||
|
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " })
|
||||||
|
}
|
||||||
fn start_iteration(
|
fn start_iteration(
|
||||||
&mut self,
|
&mut self,
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'transaction RoTxn,
|
txn: &'transaction RoTxn,
|
||||||
_db_cache: &mut DatabaseCache<'transaction>,
|
_db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
_logger: &mut dyn SearchLogger<Query>,
|
||||||
parent_candidates: &RoaringBitmap,
|
parent_candidates: &RoaringBitmap,
|
||||||
parent_query_graph: &Query,
|
parent_query_graph: &Query,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@ -89,6 +96,7 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
|
|||||||
_index: &Index,
|
_index: &Index,
|
||||||
_txn: &'transaction RoTxn,
|
_txn: &'transaction RoTxn,
|
||||||
_db_cache: &mut DatabaseCache<'transaction>,
|
_db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
_logger: &mut dyn SearchLogger<Query>,
|
||||||
_universe: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
) -> Result<Option<RankingRuleOutput<Query>>> {
|
) -> Result<Option<RankingRuleOutput<Query>>> {
|
||||||
let iter = self.iter.as_mut().unwrap();
|
let iter = self.iter.as_mut().unwrap();
|
||||||
@ -101,6 +109,7 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
|
|||||||
_index: &Index,
|
_index: &Index,
|
||||||
_txn: &'transaction RoTxn,
|
_txn: &'transaction RoTxn,
|
||||||
_db_cache: &mut DatabaseCache<'transaction>,
|
_db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
_logger: &mut dyn SearchLogger<Query>,
|
||||||
) {
|
) {
|
||||||
self.iter = None;
|
self.iter = None;
|
||||||
}
|
}
|
||||||
|
@ -4,6 +4,7 @@ use heed::RoTxn;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::db_cache::DatabaseCache;
|
use super::db_cache::DatabaseCache;
|
||||||
|
use super::logger::SearchLogger;
|
||||||
use super::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
|
use super::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
|
||||||
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput};
|
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput};
|
||||||
use crate::{Index, Result, TermsMatchingStrategy};
|
use crate::{Index, Result, TermsMatchingStrategy};
|
||||||
@ -30,11 +31,15 @@ impl Words {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
||||||
|
fn id(&self) -> String {
|
||||||
|
"words".to_owned()
|
||||||
|
}
|
||||||
fn start_iteration(
|
fn start_iteration(
|
||||||
&mut self,
|
&mut self,
|
||||||
_index: &Index,
|
_index: &Index,
|
||||||
_txn: &'transaction RoTxn,
|
_txn: &'transaction RoTxn,
|
||||||
_db_cache: &mut DatabaseCache<'transaction>,
|
_db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
parent_candidates: &RoaringBitmap,
|
parent_candidates: &RoaringBitmap,
|
||||||
parent_query_graph: &QueryGraph,
|
parent_query_graph: &QueryGraph,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@ -42,6 +47,8 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
|||||||
self.exhausted = false;
|
self.exhausted = false;
|
||||||
self.query_graph = Some(parent_query_graph.clone());
|
self.query_graph = Some(parent_query_graph.clone());
|
||||||
|
|
||||||
|
logger.log_words_state(parent_query_graph);
|
||||||
|
|
||||||
// TODO: a phrase can contain many positions, but represents a single node.
|
// TODO: a phrase can contain many positions, but represents a single node.
|
||||||
// That's a problem.
|
// That's a problem.
|
||||||
let positions_to_remove = match self.terms_matching_strategy {
|
let positions_to_remove = match self.terms_matching_strategy {
|
||||||
@ -70,6 +77,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
|||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'transaction RoTxn,
|
txn: &'transaction RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'transaction>,
|
db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||||
// println!("Words: next bucket");
|
// println!("Words: next bucket");
|
||||||
@ -99,6 +107,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
|||||||
let position_to_remove = self.positions_to_remove.pop().unwrap();
|
let position_to_remove = self.positions_to_remove.pop().unwrap();
|
||||||
query_graph.remove_words_at_position(position_to_remove);
|
query_graph.remove_words_at_position(position_to_remove);
|
||||||
}
|
}
|
||||||
|
logger.log_words_state(query_graph);
|
||||||
|
|
||||||
Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
|
Ok(Some(RankingRuleOutput { query: child_query_graph, candidates: this_bucket }))
|
||||||
}
|
}
|
||||||
@ -108,6 +117,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
|||||||
_index: &Index,
|
_index: &Index,
|
||||||
_txn: &'transaction RoTxn,
|
_txn: &'transaction RoTxn,
|
||||||
_db_cache: &mut DatabaseCache<'transaction>,
|
_db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) {
|
) {
|
||||||
// println!("Words: end iteration");
|
// println!("Words: end iteration");
|
||||||
self.iterating = false;
|
self.iterating = false;
|
||||||
|
Loading…
Reference in New Issue
Block a user