mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Fix more bugs + visual empty path cache logging
This commit is contained in:
parent
0e1fbbf7c6
commit
6c85c0d95e
@ -71,30 +71,29 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
||||
assert!(universe.len() > 1);
|
||||
let mut state = self.state.take().unwrap();
|
||||
|
||||
let Some(cheapest_paths_state) = state.cheapest_paths_state.take() else {
|
||||
let Some(mut cheapest_paths_state) = state.cheapest_paths_state.take() else {
|
||||
return Ok(None);
|
||||
};
|
||||
|
||||
let mut paths = PathsMap::default();
|
||||
|
||||
if let Some(next_cheapest_paths_state) = cheapest_paths_state
|
||||
.compute_paths_of_next_lowest_cost(
|
||||
&mut state.graph,
|
||||
&state.empty_paths_cache,
|
||||
&mut paths,
|
||||
)
|
||||
{
|
||||
state.cheapest_paths_state = Some(next_cheapest_paths_state);
|
||||
} else {
|
||||
state.cheapest_paths_state = None;
|
||||
while paths.is_empty() {
|
||||
if let Some(next_cheapest_paths_state) = cheapest_paths_state
|
||||
.compute_paths_of_next_lowest_cost(
|
||||
&mut state.graph,
|
||||
&state.empty_paths_cache,
|
||||
&mut paths,
|
||||
)
|
||||
{
|
||||
cheapest_paths_state = next_cheapest_paths_state;
|
||||
} else {
|
||||
self.state = None;
|
||||
return Ok(None);
|
||||
}
|
||||
}
|
||||
state.cheapest_paths_state = Some(cheapest_paths_state);
|
||||
|
||||
if paths.is_empty() {
|
||||
self.state = None;
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
G::log_state(&state.graph, &paths, logger);
|
||||
G::log_state(&state.graph, &paths, &state.empty_paths_cache, logger);
|
||||
|
||||
let bucket = state.graph.resolve_paths(
|
||||
index,
|
||||
|
@ -6,6 +6,7 @@ use std::{io::Write, path::PathBuf};
|
||||
|
||||
use crate::new::QueryNode;
|
||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::new::ranking_rule_graph::empty_paths_cache::EmptyPathsCache;
|
||||
use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
|
||||
use crate::new::ranking_rule_graph::{
|
||||
paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph,
|
||||
@ -36,6 +37,7 @@ pub enum SearchEvents {
|
||||
ProximityState {
|
||||
graph: RankingRuleGraph<ProximityGraph>,
|
||||
paths: PathsMap<u64>,
|
||||
empty_paths_cache: EmptyPathsCache,
|
||||
},
|
||||
}
|
||||
|
||||
@ -107,16 +109,16 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
universe: universe.clone(),
|
||||
})
|
||||
}
|
||||
fn add_to_results(&mut self, docids: &RoaringBitmap) {
|
||||
self.events.push(SearchEvents::ExtendResults { new: docids.clone() });
|
||||
fn add_to_results(&mut self, docids: &mut dyn Iterator<Item = u32>) {
|
||||
self.events.push(SearchEvents::ExtendResults { new: docids.collect() });
|
||||
}
|
||||
|
||||
fn log_words_state(&mut self, query_graph: &QueryGraph) {
|
||||
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
|
||||
}
|
||||
|
||||
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &PathsMap<u64>,) {
|
||||
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone() })
|
||||
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache) {
|
||||
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.clone(), empty_paths_cache: empty_paths_cache.clone() })
|
||||
}
|
||||
|
||||
|
||||
@ -224,13 +226,13 @@ results.{random} {{
|
||||
link: \"{id}.d2.svg\"
|
||||
}}").unwrap();
|
||||
},
|
||||
SearchEvents::ProximityState { graph, paths } => {
|
||||
SearchEvents::ProximityState { graph, paths, empty_paths_cache } => {
|
||||
let cur_ranking_rule = timestamp.len() - 1;
|
||||
let cur_activated_id = activated_id(×tamp);
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let mut new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::proximity_graph_d2_description(graph, paths, &mut new_file);
|
||||
Self::proximity_graph_d2_description(graph, paths, empty_paths_cache, &mut new_file);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
@ -288,7 +290,7 @@ shape: class").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
fn proximity_graph_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, file: &mut File) {
|
||||
fn proximity_graph_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths: &PathsMap<u64>, empty_paths_cache: &EmptyPathsCache, file: &mut File) {
|
||||
writeln!(file,"direction: right").unwrap();
|
||||
|
||||
writeln!(file, "Proximity Graph {{").unwrap();
|
||||
@ -322,35 +324,48 @@ shape: class").unwrap();
|
||||
writeln!(file, "Shortest Paths {{").unwrap();
|
||||
Self::paths_d2_description(graph, "", paths, file);
|
||||
writeln!(file, "}}").unwrap();
|
||||
}
|
||||
fn paths_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, paths: &PathsMap<u64>, file: &mut File) {
|
||||
|
||||
writeln!(file, "Empty Path Prefixes {{").unwrap();
|
||||
Self::paths_d2_description(graph, "", &empty_paths_cache.empty_prefixes, file);
|
||||
writeln!(file, "}}").unwrap();
|
||||
|
||||
writeln!(file, "Removed Edges {{").unwrap();
|
||||
for edge_idx in empty_paths_cache.empty_edges.iter() {
|
||||
writeln!(file, "{edge_idx}").unwrap();
|
||||
}
|
||||
writeln!(file, "}}").unwrap();
|
||||
}
|
||||
fn edge_d2_description(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, edge_idx: u32, file: &mut File) -> String {
|
||||
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
|
||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
||||
let from_node_desc = match from_node {
|
||||
QueryNode::Term(term) => match &term.value {
|
||||
QueryTerm::Phrase(_) => todo!(),
|
||||
QueryTerm::Word { derivations } => derivations.original.clone(),
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => "START".to_owned(),
|
||||
QueryNode::End => "END".to_owned(),
|
||||
};
|
||||
let to_node = &graph.query_graph.nodes[*to_node as usize];
|
||||
let to_node_desc = match to_node {
|
||||
QueryNode::Term(term) => match &term.value {
|
||||
QueryTerm::Phrase(_) => todo!(),
|
||||
QueryTerm::Word { derivations } => derivations.original.clone(),
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => "START".to_owned(),
|
||||
QueryNode::End => "END".to_owned(),
|
||||
};
|
||||
let edge_id = format!("{paths_idx}{edge_idx}");
|
||||
writeln!(file, "{edge_id}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
|
||||
shape: class
|
||||
}}").unwrap();
|
||||
edge_id
|
||||
}
|
||||
fn paths_d2_description<T>(graph: &RankingRuleGraph<ProximityGraph>, paths_idx: &str, paths: &PathsMap<T>, file: &mut File) {
|
||||
for (edge_idx, rest) in paths.nodes.iter() {
|
||||
let Edge { from_node, to_node, cost, .. } = graph.all_edges[*edge_idx as usize].as_ref().unwrap() ;
|
||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
||||
let from_node_desc = match from_node {
|
||||
QueryNode::Term(term) => match &term.value {
|
||||
QueryTerm::Phrase(_) => todo!(),
|
||||
QueryTerm::Word { derivations } => derivations.original.clone(),
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => "START".to_owned(),
|
||||
QueryNode::End => "END".to_owned(),
|
||||
};
|
||||
let to_node = &graph.query_graph.nodes[*to_node as usize];
|
||||
let to_node_desc = match to_node {
|
||||
QueryNode::Term(term) => match &term.value {
|
||||
QueryTerm::Phrase(_) => todo!(),
|
||||
QueryTerm::Word { derivations } => derivations.original.clone(),
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => "START".to_owned(),
|
||||
QueryNode::End => "END".to_owned(),
|
||||
};
|
||||
let edge_id = format!("{paths_idx}{edge_idx}");
|
||||
writeln!(file, "{edge_id}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
|
||||
shape: class
|
||||
}}").unwrap();
|
||||
let edge_id = Self::edge_d2_description(graph, paths_idx, *edge_idx, file);
|
||||
for (dest_edge_idx, _) in rest.nodes.iter() {
|
||||
let dest_edge_id = format!("{paths_idx}{edge_idx}{dest_edge_idx}");
|
||||
writeln!(file, "{edge_id} -> {dest_edge_id}").unwrap();
|
||||
|
@ -5,7 +5,10 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::{
|
||||
query_graph,
|
||||
ranking_rule_graph::{paths_map::PathsMap, proximity::ProximityGraph, RankingRuleGraph},
|
||||
ranking_rule_graph::{
|
||||
empty_paths_cache::EmptyPathsCache, paths_map::PathsMap, proximity::ProximityGraph,
|
||||
RankingRuleGraph,
|
||||
},
|
||||
QueryGraph, RankingRule, RankingRuleQueryTrait,
|
||||
};
|
||||
|
||||
@ -41,7 +44,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
) {
|
||||
}
|
||||
|
||||
fn add_to_results(&mut self, docids: &RoaringBitmap) {}
|
||||
fn add_to_results(&mut self, docids: &mut dyn Iterator<Item = u32>) {}
|
||||
|
||||
fn log_words_state(&mut self, query_graph: &Q) {}
|
||||
|
||||
@ -49,6 +52,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||
paths_map: &PathsMap<u64>,
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
) {
|
||||
}
|
||||
}
|
||||
@ -78,7 +82,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
ranking_rule: &dyn RankingRule<'transaction, Q>,
|
||||
universe: &RoaringBitmap,
|
||||
);
|
||||
fn add_to_results(&mut self, docids: &RoaringBitmap);
|
||||
fn add_to_results(&mut self, docids: &mut dyn Iterator<Item = u32>);
|
||||
|
||||
fn log_words_state(&mut self, query_graph: &Q);
|
||||
|
||||
@ -86,5 +90,6 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||
paths: &PathsMap<u64>,
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
);
|
||||
}
|
||||
|
@ -81,7 +81,9 @@ impl KCheapestPathsState {
|
||||
while self.kth_cheapest_path.cost <= cur_cost {
|
||||
if let Some(next_self) = self.compute_next_cheapest_paths(graph, empty_paths_cache) {
|
||||
self = next_self;
|
||||
if self.kth_cheapest_path.cost == cur_cost {
|
||||
if self.kth_cheapest_path.cost == cur_cost
|
||||
&& !empty_paths_cache.path_is_empty(&self.kth_cheapest_path.edges)
|
||||
{
|
||||
into_map.add_path(&self.kth_cheapest_path);
|
||||
} else {
|
||||
break;
|
||||
@ -148,7 +150,13 @@ impl KCheapestPathsState {
|
||||
|
||||
while let Some((next_cheapest_path, cost2)) = next_cheapest_paths.remove_first() {
|
||||
assert_eq!(cost, cost2);
|
||||
if empty_paths_cache.path_is_empty(&next_cheapest_path) {
|
||||
// NOTE: it is important not to discard the paths that are forbidden due to a
|
||||
// forbidden prefix, because the cheapest path algorithm (Dijkstra) cannot take
|
||||
// this property into account.
|
||||
if next_cheapest_path
|
||||
.iter()
|
||||
.any(|edge_index| graph.all_edges[*edge_index as usize].is_none())
|
||||
{
|
||||
continue;
|
||||
} else {
|
||||
self.cheapest_paths.insert(next_cheapest_path.iter().copied(), cost);
|
||||
|
@ -4,12 +4,16 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use super::paths_map::PathsMap;
|
||||
|
||||
#[derive(Default)]
|
||||
#[derive(Default, Clone)]
|
||||
pub struct EmptyPathsCache {
|
||||
pub empty_edges: RoaringBitmap,
|
||||
pub empty_prefixes: PathsMap<()>,
|
||||
}
|
||||
impl EmptyPathsCache {
|
||||
pub fn forbid_edge(&mut self, edge_idx: u32) {
|
||||
self.empty_edges.insert(edge_idx);
|
||||
self.empty_prefixes.remove_edge(&edge_idx);
|
||||
}
|
||||
pub fn path_is_empty(&self, path: &[u32]) -> bool {
|
||||
for edge in path {
|
||||
if self.empty_edges.contains(*edge) {
|
||||
|
@ -11,6 +11,7 @@ use std::ops::ControlFlow;
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use self::empty_paths_cache::EmptyPathsCache;
|
||||
use self::paths_map::PathsMap;
|
||||
|
||||
use super::db_cache::DatabaseCache;
|
||||
@ -82,6 +83,7 @@ pub trait RankingRuleGraphTrait: Sized {
|
||||
fn log_state(
|
||||
graph: &RankingRuleGraph<Self>,
|
||||
paths: &PathsMap<u64>,
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
);
|
||||
}
|
||||
|
@ -3,6 +3,7 @@ pub mod compute_docids;
|
||||
|
||||
use heed::RoTxn;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
use super::paths_map::PathsMap;
|
||||
use super::{Edge, EdgeDetails, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
@ -67,8 +68,9 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
fn log_state(
|
||||
graph: &super::RankingRuleGraph<Self>,
|
||||
paths: &PathsMap<u64>,
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
logger: &mut dyn crate::new::logger::SearchLogger<crate::new::QueryGraph>,
|
||||
) {
|
||||
logger.log_proximity_state(graph, paths);
|
||||
logger.log_proximity_state(graph, paths, empty_paths_cache);
|
||||
}
|
||||
}
|
||||
|
@ -40,7 +40,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
BitmapOrAllRef::Bitmap(edge_docids) => {
|
||||
if edge_docids.is_disjoint(universe) {
|
||||
// 1. Store in the cache that this edge is empty for this universe
|
||||
empty_paths_cache.empty_edges.insert(edge_index);
|
||||
empty_paths_cache.forbid_edge(edge_index);
|
||||
// 2. remove all the paths that contain this edge for this universe
|
||||
paths.remove_edge(&edge_index);
|
||||
// 3. remove this edge from the proximity graph
|
||||
|
@ -139,6 +139,7 @@ pub fn execute_search<'transaction>(
|
||||
candidates[0] = universe.clone();
|
||||
|
||||
let mut cur_ranking_rule_index = 0;
|
||||
|
||||
macro_rules! back {
|
||||
() => {
|
||||
logger.end_iteration_ranking_rule(
|
||||
@ -157,13 +158,20 @@ pub fn execute_search<'transaction>(
|
||||
}
|
||||
|
||||
let mut results = vec![];
|
||||
macro_rules! add_to_results {
|
||||
($candidates:expr) => {
|
||||
logger.add_to_results(&mut $candidates.iter().take(20 - results.len()));
|
||||
let iter = $candidates.iter().take(20 - results.len());
|
||||
results.extend(iter);
|
||||
};
|
||||
}
|
||||
|
||||
// TODO: skip buckets when we want to start from an offset
|
||||
while results.len() < 20 {
|
||||
// The universe for this bucket is zero or one element, so we don't need to sort
|
||||
// anything, just extend the results and go back to the parent ranking rule.
|
||||
if candidates[cur_ranking_rule_index].len() <= 1 {
|
||||
logger.add_to_results(&candidates[cur_ranking_rule_index]);
|
||||
results.extend(&candidates[cur_ranking_rule_index]);
|
||||
add_to_results!(candidates[cur_ranking_rule_index]);
|
||||
back!();
|
||||
continue;
|
||||
}
|
||||
@ -183,15 +191,12 @@ pub fn execute_search<'transaction>(
|
||||
|
||||
if next_bucket.candidates.len() <= 1 {
|
||||
// Only zero or one candidate, no need to sort through the child ranking rule.
|
||||
logger.add_to_results(&next_bucket.candidates);
|
||||
results.extend(next_bucket.candidates);
|
||||
add_to_results!(next_bucket.candidates);
|
||||
continue;
|
||||
} else {
|
||||
// many candidates, give to next ranking rule, if any
|
||||
if cur_ranking_rule_index == ranking_rules_len - 1 {
|
||||
// TODO: don't extend too much, up to the limit only
|
||||
logger.add_to_results(&next_bucket.candidates);
|
||||
results.extend(next_bucket.candidates);
|
||||
add_to_results!(next_bucket.candidates);
|
||||
} else {
|
||||
cur_ranking_rule_index += 1;
|
||||
candidates[cur_ranking_rule_index] = next_bucket.candidates.clone();
|
||||
@ -313,8 +318,7 @@ mod tests {
|
||||
let mut db_cache = DatabaseCache::default();
|
||||
|
||||
let query_graph =
|
||||
make_query_graph(&index, &txn, &mut db_cache, "the sun flower is facing the su")
|
||||
.unwrap();
|
||||
make_query_graph(&index, &txn, &mut db_cache, "a a a a a a a a a a").unwrap();
|
||||
|
||||
// TODO: filters + maybe distinct attributes?
|
||||
let universe = get_start_universe(
|
||||
|
Loading…
Reference in New Issue
Block a user