Cargo fmt

This commit is contained in:
Loïc Lecrenier 2023-03-08 09:55:53 +01:00
parent 10626dddfc
commit 57fa689131
22 changed files with 348 additions and 213 deletions

View File

@ -54,8 +54,6 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
#[macro_use] #[macro_use]
pub mod documents; pub mod documents;
pub use search::new;
mod asc_desc; mod asc_desc;
mod criterion; mod criterion;
mod error; mod error;

View File

@ -1,8 +1,11 @@
use super::{interner::Interned, SearchContext}; use std::collections::hash_map::Entry;
use crate::Result;
use fxhash::FxHashMap; use fxhash::FxHashMap;
use heed::types::ByteSlice; use heed::types::ByteSlice;
use std::collections::hash_map::Entry;
use super::interner::Interned;
use super::SearchContext;
use crate::Result;
#[derive(Default)] #[derive(Default)]
pub struct DatabaseCache<'search> { pub struct DatabaseCache<'search> {

View File

@ -1,13 +1,13 @@
use super::logger::SearchLogger;
use super::ranking_rule_graph::EdgeDocidsCache;
use super::ranking_rule_graph::EmptyPathsCache;
use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait};
use super::small_bitmap::SmallBitmap;
use super::SearchContext;
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput};
use crate::Result;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::ranking_rule_graph::{
EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait,
};
use super::small_bitmap::SmallBitmap;
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::Result;
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> { pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
id: String, id: String,
state: Option<GraphBasedRankingRuleState<G>>, state: Option<GraphBasedRankingRuleState<G>>,

View File

@ -1,7 +1,8 @@
use fxhash::FxHashMap;
use std::hash::Hash; use std::hash::Hash;
use std::marker::PhantomData; use std::marker::PhantomData;
use fxhash::FxHashMap;
pub struct Interned<T> { pub struct Interned<T> {
idx: u32, idx: u32,
_phantom: PhantomData<T>, _phantom: PhantomData<T>,

View File

@ -1,39 +1,37 @@
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
use std::time::Instant;
use rand::random; use rand::random;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use std::fs::File;
use std::time::Instant;
use std::{io::Write, path::PathBuf};
use crate::new::ranking_rule_graph::TypoGraph; use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::new::small_bitmap::SmallBitmap; use crate::search::new::ranking_rule_graph::{
use crate::new::{QueryNode, QueryGraph, SearchContext}; Edge, EdgeDetails, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; TypoGraph,
use crate::new::ranking_rule_graph::EmptyPathsCache;
use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
use crate::new::ranking_rule_graph::{
ProximityGraph, RankingRuleGraph,
}; };
use crate::search::new::small_bitmap::SmallBitmap;
use super::{RankingRule, SearchLogger}; use crate::search::new::{QueryGraph, QueryNode, SearchContext};
use crate::search::new::{RankingRule, SearchLogger};
pub enum SearchEvents { pub enum SearchEvents {
RankingRuleStartIteration { RankingRuleStartIteration {
ranking_rule_idx: usize, ranking_rule_idx: usize,
query: QueryGraph, query: QueryGraph,
universe: RoaringBitmap, universe: RoaringBitmap,
time: Instant time: Instant,
}, },
RankingRuleNextBucket { RankingRuleNextBucket {
ranking_rule_idx: usize, ranking_rule_idx: usize,
universe: RoaringBitmap, universe: RoaringBitmap,
candidates: RoaringBitmap, candidates: RoaringBitmap,
time: Instant time: Instant,
}, },
RankingRuleEndIteration { RankingRuleEndIteration {
ranking_rule_idx: usize, ranking_rule_idx: usize,
universe: RoaringBitmap, universe: RoaringBitmap,
time: Instant time: Instant,
}, },
ExtendResults { ExtendResults {
new: Vec<u32>, new: Vec<u32>,
@ -57,7 +55,11 @@ pub enum SearchEvents {
distances: Vec<Vec<(u16, SmallBitmap)>>, distances: Vec<Vec<(u16, SmallBitmap)>>,
cost: u16, cost: u16,
}, },
RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap, time: Instant }, RankingRuleSkipBucket {
ranking_rule_idx: usize,
candidates: RoaringBitmap,
time: Instant,
},
} }
pub struct DetailedSearchLogger { pub struct DetailedSearchLogger {
@ -106,7 +108,6 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
query: &QueryGraph, query: &QueryGraph,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) { ) {
self.events.push(SearchEvents::RankingRuleStartIteration { self.events.push(SearchEvents::RankingRuleStartIteration {
ranking_rule_idx, ranking_rule_idx,
@ -122,7 +123,6 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
candidates: &RoaringBitmap, candidates: &RoaringBitmap,
) { ) {
self.events.push(SearchEvents::RankingRuleNextBucket { self.events.push(SearchEvents::RankingRuleNextBucket {
ranking_rule_idx, ranking_rule_idx,
@ -136,12 +136,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
ranking_rule_idx: usize, ranking_rule_idx: usize,
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
candidates: &RoaringBitmap, candidates: &RoaringBitmap,
) { ) {
self.events.push(SearchEvents::RankingRuleSkipBucket { self.events.push(SearchEvents::RankingRuleSkipBucket {
ranking_rule_idx, ranking_rule_idx,
candidates: candidates.clone(), candidates: candidates.clone(),
time: Instant::now() time: Instant::now(),
}) })
} }
@ -150,12 +149,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
ranking_rule_idx: usize, ranking_rule_idx: usize,
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>, _ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) { ) {
self.events.push(SearchEvents::RankingRuleEndIteration { self.events.push(SearchEvents::RankingRuleEndIteration {
ranking_rule_idx, ranking_rule_idx,
universe: universe.clone(), universe: universe.clone(),
time: Instant::now() time: Instant::now(),
}) })
} }
fn add_to_results(&mut self, docids: &[u32]) { fn add_to_results(&mut self, docids: &[u32]) {
@ -166,18 +164,47 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() }); self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
} }
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<(u16, SmallBitmap)>>, cost: u16,) { fn log_proximity_state(
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost }) &mut self,
query_graph: &RankingRuleGraph<ProximityGraph>,
paths_map: &[Vec<u16>],
empty_paths_cache: &EmptyPathsCache,
universe: &RoaringBitmap,
distances: Vec<Vec<(u16, SmallBitmap)>>,
cost: u16,
) {
self.events.push(SearchEvents::ProximityState {
graph: query_graph.clone(),
paths: paths_map.to_vec(),
empty_paths_cache: empty_paths_cache.clone(),
universe: universe.clone(),
distances,
cost,
})
} }
fn log_typo_state(&mut self, query_graph: &RankingRuleGraph<TypoGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<(u16, SmallBitmap)>>, cost: u16,) { fn log_typo_state(
self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost }) &mut self,
query_graph: &RankingRuleGraph<TypoGraph>,
paths_map: &[Vec<u16>],
empty_paths_cache: &EmptyPathsCache,
universe: &RoaringBitmap,
distances: Vec<Vec<(u16, SmallBitmap)>>,
cost: u16,
) {
self.events.push(SearchEvents::TypoState {
graph: query_graph.clone(),
paths: paths_map.to_vec(),
empty_paths_cache: empty_paths_cache.clone(),
universe: universe.clone(),
distances,
cost,
})
} }
} }
impl DetailedSearchLogger { impl DetailedSearchLogger {
pub fn write_d2_description(&self,ctx: &mut SearchContext,) { pub fn write_d2_description(&self, ctx: &mut SearchContext) {
let mut prev_time = self.initial_query_time.unwrap(); let mut prev_time = self.initial_query_time.unwrap();
let mut timestamp = vec![]; let mut timestamp = vec![];
fn activated_id(timestamp: &[usize]) -> String { fn activated_id(timestamp: &[usize]) -> String {
@ -229,14 +256,22 @@ impl DetailedSearchLogger {
) )
.unwrap(); .unwrap();
} }
writeln!(&mut file, writeln!(
"{ranking_rule_idx}.{self_activated_id} {{ &mut file,
"{ranking_rule_idx}.{self_activated_id} {{
style {{ style {{
fill: \"#D8A7B1\" fill: \"#D8A7B1\"
}} }}
}}").unwrap(); }}"
)
.unwrap();
} }
SearchEvents::RankingRuleNextBucket { ranking_rule_idx, time, universe, candidates } => { SearchEvents::RankingRuleNextBucket {
ranking_rule_idx,
time,
universe,
candidates,
} => {
let _elapsed = time.duration_since(prev_time); let _elapsed = time.duration_since(prev_time);
prev_time = *time; prev_time = *time;
let old_activated_id = activated_id(&timestamp); let old_activated_id = activated_id(&timestamp);
@ -280,7 +315,7 @@ impl DetailedSearchLogger {
} }
SearchEvents::ExtendResults { new } => { SearchEvents::ExtendResults { new } => {
if new.is_empty() { if new.is_empty() {
continue continue;
} }
let cur_ranking_rule = timestamp.len() - 1; let cur_ranking_rule = timestamp.len() - 1;
let cur_activated_id = activated_id(&timestamp); let cur_activated_id = activated_id(&timestamp);
@ -300,7 +335,7 @@ results.{random} {{
" "
) )
.unwrap(); .unwrap();
}, }
SearchEvents::WordsState { query_graph } => { SearchEvents::WordsState { query_graph } => {
let cur_ranking_rule = timestamp.len() - 1; let cur_ranking_rule = timestamp.len() - 1;
*timestamp.last_mut().unwrap() += 1; *timestamp.last_mut().unwrap() += 1;
@ -314,9 +349,18 @@ results.{random} {{
&mut file, &mut file,
"{id} {{ "{id} {{
link: \"{id}.d2.svg\" link: \"{id}.d2.svg\"
}}").unwrap(); }}"
}, )
SearchEvents::ProximityState { graph, paths, empty_paths_cache, universe, distances, cost } => { .unwrap();
}
SearchEvents::ProximityState {
graph,
paths,
empty_paths_cache,
universe,
distances,
cost,
} => {
let cur_ranking_rule = timestamp.len() - 1; let cur_ranking_rule = timestamp.len() - 1;
*timestamp.last_mut().unwrap() += 1; *timestamp.last_mut().unwrap() += 1;
let cur_activated_id = activated_id(&timestamp); let cur_activated_id = activated_id(&timestamp);
@ -324,15 +368,32 @@ results.{random} {{
let id = format!("{cur_ranking_rule}.{cur_activated_id}"); let id = format!("{cur_ranking_rule}.{cur_activated_id}");
let new_file_path = self.folder_path.join(format!("{id}.d2")); let new_file_path = self.folder_path.join(format!("{id}.d2"));
let mut new_file = std::fs::File::create(new_file_path).unwrap(); let mut new_file = std::fs::File::create(new_file_path).unwrap();
Self::ranking_rule_graph_d2_description(ctx, graph, paths, empty_paths_cache, distances.clone(), &mut new_file); Self::ranking_rule_graph_d2_description(
ctx,
graph,
paths,
empty_paths_cache,
distances.clone(),
&mut new_file,
);
writeln!( writeln!(
&mut file, &mut file,
"{id} {{ "{id} {{
link: \"{id}.d2.svg\" link: \"{id}.d2.svg\"
tooltip: \"cost {cost}, universe len: {}\" tooltip: \"cost {cost}, universe len: {}\"
}}", universe.len()).unwrap(); }}",
}, universe.len()
SearchEvents::TypoState { graph, paths, empty_paths_cache, universe, distances, cost } => { )
.unwrap();
}
SearchEvents::TypoState {
graph,
paths,
empty_paths_cache,
universe,
distances,
cost,
} => {
let cur_ranking_rule = timestamp.len() - 1; let cur_ranking_rule = timestamp.len() - 1;
*timestamp.last_mut().unwrap() += 1; *timestamp.last_mut().unwrap() += 1;
let cur_activated_id = activated_id(&timestamp); let cur_activated_id = activated_id(&timestamp);
@ -340,76 +401,110 @@ results.{random} {{
let id = format!("{cur_ranking_rule}.{cur_activated_id}"); let id = format!("{cur_ranking_rule}.{cur_activated_id}");
let new_file_path = self.folder_path.join(format!("{id}.d2")); let new_file_path = self.folder_path.join(format!("{id}.d2"));
let mut new_file = std::fs::File::create(new_file_path).unwrap(); let mut new_file = std::fs::File::create(new_file_path).unwrap();
Self::ranking_rule_graph_d2_description(ctx,graph, paths, empty_paths_cache, distances.clone(), &mut new_file); Self::ranking_rule_graph_d2_description(
ctx,
graph,
paths,
empty_paths_cache,
distances.clone(),
&mut new_file,
);
writeln!( writeln!(
&mut file, &mut file,
"{id} {{ "{id} {{
link: \"{id}.d2.svg\" link: \"{id}.d2.svg\"
tooltip: \"cost {cost}, universe len: {}\" tooltip: \"cost {cost}, universe len: {}\"
}}", universe.len()).unwrap(); }}",
}, universe.len()
)
.unwrap();
}
} }
} }
writeln!(&mut file, "}}").unwrap(); writeln!(&mut file, "}}").unwrap();
} }
fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, distances: &[(u16, SmallBitmap)], file: &mut File) { fn query_node_d2_desc(
ctx: &mut SearchContext,
node_idx: usize,
node: &QueryNode,
distances: &[(u16, SmallBitmap)],
file: &mut File,
) {
match &node { match &node {
QueryNode::Term(LocatedQueryTerm { value, .. }) => { QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
match value { QueryTerm::Phrase { phrase } => {
QueryTerm::Phrase { phrase } => { let phrase = ctx.phrase_interner.get(*phrase);
let phrase = ctx.phrase_interner.get(*phrase); let phrase_str = phrase.description(&ctx.word_interner);
let phrase_str = phrase.description(&ctx.word_interner); writeln!(file, "{node_idx} : \"{phrase_str}\"").unwrap();
writeln!(file,"{node_idx} : \"{phrase_str}\"").unwrap(); }
}, QueryTerm::Word {
QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db, synonyms, split_words } } => { derivations:
let original = ctx.word_interner.get(*original); WordDerivations {
writeln!(file,"{node_idx} : \"{original}\" {{ original,
shape: class").unwrap(); zero_typo,
for w in zero_typo.iter().copied() { one_typo,
let w = ctx.word_interner.get(w); two_typos,
writeln!(file, "\"{w}\" : 0").unwrap(); use_prefix_db,
} synonyms,
for w in one_typo.iter().copied() { split_words,
let w = ctx.word_interner.get(w); },
writeln!(file, "\"{w}\" : 1").unwrap(); } => {
} let original = ctx.word_interner.get(*original);
for w in two_typos.iter().copied() { writeln!(
let w = ctx.word_interner.get(w); file,
writeln!(file, "\"{w}\" : 2").unwrap(); "{node_idx} : \"{original}\" {{
} shape: class"
if let Some(split_words) = split_words { )
let phrase = ctx.phrase_interner.get(*split_words); .unwrap();
let phrase_str = phrase.description(&ctx.word_interner); for w in zero_typo.iter().copied() {
writeln!(file, "\"{phrase_str}\" : split_words").unwrap(); let w = ctx.word_interner.get(w);
} writeln!(file, "\"{w}\" : 0").unwrap();
for synonym in synonyms.iter().copied() { }
let phrase = ctx.phrase_interner.get(synonym); for w in one_typo.iter().copied() {
let phrase_str = phrase.description(&ctx.word_interner); let w = ctx.word_interner.get(w);
writeln!(file, "\"{phrase_str}\" : synonym").unwrap(); writeln!(file, "\"{w}\" : 1").unwrap();
} }
if *use_prefix_db { for w in two_typos.iter().copied() {
writeln!(file, "use prefix DB : true").unwrap(); let w = ctx.word_interner.get(w);
} writeln!(file, "\"{w}\" : 2").unwrap();
for (d, edges) in distances.iter() { }
writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::<Vec<_>>() ).unwrap(); if let Some(split_words) = split_words {
} let phrase = ctx.phrase_interner.get(*split_words);
let phrase_str = phrase.description(&ctx.word_interner);
writeln!(file, "\"{phrase_str}\" : split_words").unwrap();
}
for synonym in synonyms.iter().copied() {
let phrase = ctx.phrase_interner.get(synonym);
let phrase_str = phrase.description(&ctx.word_interner);
writeln!(file, "\"{phrase_str}\" : synonym").unwrap();
}
if *use_prefix_db {
writeln!(file, "use prefix DB : true").unwrap();
}
for (d, edges) in distances.iter() {
writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::<Vec<_>>())
.unwrap();
}
writeln!(file, "}}").unwrap(); writeln!(file, "}}").unwrap();
},
} }
}, },
QueryNode::Deleted => panic!(), QueryNode::Deleted => panic!(),
QueryNode::Start => { QueryNode::Start => {
writeln!(file,"{node_idx} : START").unwrap(); writeln!(file, "{node_idx} : START").unwrap();
}, }
QueryNode::End => { QueryNode::End => {
writeln!(file,"{node_idx} : END").unwrap(); writeln!(file, "{node_idx} : END").unwrap();
}, }
} }
} }
fn query_graph_d2_description(ctx: &mut SearchContext, query_graph: &QueryGraph, file: &mut File) { fn query_graph_d2_description(
writeln!(file,"direction: right").unwrap(); ctx: &mut SearchContext,
query_graph: &QueryGraph,
file: &mut File,
) {
writeln!(file, "direction: right").unwrap();
for node in 0..query_graph.nodes.len() { for node in 0..query_graph.nodes.len() {
if matches!(query_graph.nodes[node], QueryNode::Deleted) { if matches!(query_graph.nodes[node], QueryNode::Deleted) {
continue; continue;
@ -421,8 +516,15 @@ shape: class").unwrap();
} }
} }
} }
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<(u16, SmallBitmap)>>, file: &mut File) { fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(
writeln!(file,"direction: right").unwrap(); ctx: &mut SearchContext,
graph: &RankingRuleGraph<R>,
paths: &[Vec<u16>],
_empty_paths_cache: &EmptyPathsCache,
distances: Vec<Vec<(u16, SmallBitmap)>>,
file: &mut File,
) {
writeln!(file, "direction: right").unwrap();
writeln!(file, "Proximity Graph {{").unwrap(); writeln!(file, "Proximity Graph {{").unwrap();
for (node_idx, node) in graph.query_graph.nodes.iter().enumerate() { for (node_idx, node) in graph.query_graph.nodes.iter().enumerate() {
@ -437,17 +539,21 @@ shape: class").unwrap();
match &details { match &details {
EdgeDetails::Unconditional => { EdgeDetails::Unconditional => {
writeln!(file, writeln!(
file,
"{from_node} -> {to_node} : \"always cost {cost}\"", "{from_node} -> {to_node} : \"always cost {cost}\"",
cost = edge.cost, cost = edge.cost,
).unwrap(); )
.unwrap();
} }
EdgeDetails::Data(details) => { EdgeDetails::Data(details) => {
writeln!(file, writeln!(
file,
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"", "{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
cost = edge.cost, cost = edge.cost,
edge_label = R::graphviz_edge_details_label(details) edge_label = R::graphviz_edge_details_label(details)
).unwrap(); )
.unwrap();
} }
} }
} }
@ -457,7 +563,6 @@ shape: class").unwrap();
// Self::paths_d2_description(graph, paths, file); // Self::paths_d2_description(graph, paths, file);
// writeln!(file, "}}").unwrap(); // writeln!(file, "}}").unwrap();
writeln!(file, "Shortest Paths {{").unwrap(); writeln!(file, "Shortest Paths {{").unwrap();
Self::paths_d2_description(ctx, graph, paths, file); Self::paths_d2_description(ctx, graph, paths, file);
writeln!(file, "}}").unwrap(); writeln!(file, "}}").unwrap();
@ -478,18 +583,24 @@ shape: class").unwrap();
// } // }
// writeln!(file, "}}").unwrap(); // writeln!(file, "}}").unwrap();
} }
fn edge_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, edge_idx: u16, file: &mut File) { fn edge_d2_description<R: RankingRuleGraphTrait>(
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ; ctx: &mut SearchContext,
graph: &RankingRuleGraph<R>,
edge_idx: u16,
file: &mut File,
) {
let Edge { from_node, to_node, cost, .. } =
graph.all_edges[edge_idx as usize].as_ref().unwrap();
let from_node = &graph.query_graph.nodes[*from_node as usize]; let from_node = &graph.query_graph.nodes[*from_node as usize];
let from_node_desc = match from_node { let from_node_desc = match from_node {
QueryNode::Term(term) => match &term.value { QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase { phrase } => { QueryTerm::Phrase { phrase } => {
let phrase = ctx.phrase_interner.get(*phrase); let phrase = ctx.phrase_interner.get(*phrase);
phrase.description(&ctx.word_interner) phrase.description(&ctx.word_interner)
}, }
QueryTerm::Word { derivations } => { QueryTerm::Word { derivations } => {
ctx.word_interner.get(derivations.original).to_owned() ctx.word_interner.get(derivations.original).to_owned()
}, }
}, },
QueryNode::Deleted => panic!(), QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(), QueryNode::Start => "START".to_owned(),
@ -501,18 +612,29 @@ shape: class").unwrap();
QueryTerm::Phrase { phrase } => { QueryTerm::Phrase { phrase } => {
let phrase = ctx.phrase_interner.get(*phrase); let phrase = ctx.phrase_interner.get(*phrase);
phrase.description(&ctx.word_interner) phrase.description(&ctx.word_interner)
}, }
QueryTerm::Word { derivations } => ctx.word_interner.get(derivations.original).to_owned(), QueryTerm::Word { derivations } => {
ctx.word_interner.get(derivations.original).to_owned()
}
}, },
QueryNode::Deleted => panic!(), QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(), QueryNode::Start => "START".to_owned(),
QueryNode::End => "END".to_owned(), QueryNode::End => "END".to_owned(),
}; };
writeln!(file, "{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{ writeln!(
file,
"{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
shape: class shape: class
}}").unwrap(); }}"
)
.unwrap();
} }
fn paths_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], file: &mut File) { fn paths_d2_description<R: RankingRuleGraphTrait>(
ctx: &mut SearchContext,
graph: &RankingRuleGraph<R>,
paths: &[Vec<u16>],
file: &mut File,
) {
for (path_idx, edge_indexes) in paths.iter().enumerate() { for (path_idx, edge_indexes) in paths.iter().enumerate() {
writeln!(file, "{path_idx} {{").unwrap(); writeln!(file, "{path_idx} {{").unwrap();
for edge_idx in edge_indexes.iter() { for edge_idx in edge_indexes.iter() {

View File

@ -3,11 +3,9 @@ pub mod detailed;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{ use super::ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph};
ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph}, use super::small_bitmap::SmallBitmap;
small_bitmap::SmallBitmap, use super::{RankingRule, RankingRuleQueryTrait};
RankingRule, RankingRuleQueryTrait,
};
pub struct DefaultSearchLogger; pub struct DefaultSearchLogger;
impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger { impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {

View File

@ -11,12 +11,8 @@ mod small_bitmap;
mod sort; mod sort;
mod words; mod words;
use self::interner::Interner; use std::collections::BTreeSet;
use self::logger::SearchLogger;
use self::query_term::Phrase;
use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
use crate::new::query_term::located_query_terms_from_string;
use crate::{Filter, Index, Result, TermsMatchingStrategy};
use charabia::Tokenize; use charabia::Tokenize;
use db_cache::DatabaseCache; use db_cache::DatabaseCache;
use heed::RoTxn; use heed::RoTxn;
@ -26,7 +22,13 @@ pub use ranking_rules::{
RankingRuleOutputIterWrapper, RankingRuleQueryTrait, RankingRuleOutputIterWrapper, RankingRuleQueryTrait,
}; };
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use std::collections::BTreeSet;
use self::interner::Interner;
use self::logger::SearchLogger;
use self::query_term::Phrase;
use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
use crate::search::new::query_term::located_query_terms_from_string;
use crate::{Filter, Index, Result, TermsMatchingStrategy};
pub enum BitmapOrAllRef<'s> { pub enum BitmapOrAllRef<'s> {
Bitmap(&'s RoaringBitmap), Bitmap(&'s RoaringBitmap),

View File

@ -12,13 +12,12 @@ use heed::types::DecodeIgnore;
use heed::RoTxn; use heed::RoTxn;
use itertools::Itertools; use itertools::Itertools;
use super::interner::{Interned, Interner};
use super::SearchContext;
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union}; use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
use crate::search::{build_dfa, get_first}; use crate::search::{build_dfa, get_first};
use crate::{CboRoaringBitmapLenCodec, Index, Result}; use crate::{CboRoaringBitmapLenCodec, Index, Result};
use super::interner::{Interned, Interner};
use super::SearchContext;
#[derive(Default, Clone, PartialEq, Eq, Hash)] #[derive(Default, Clone, PartialEq, Eq, Hash)]
pub struct Phrase { pub struct Phrase {
pub words: Vec<Option<Interned<String>>>, pub words: Vec<Option<Interned<String>>>,

View File

@ -1,8 +1,8 @@
use std::collections::HashSet; use std::collections::HashSet;
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait}; use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
use crate::new::{QueryGraph, SearchContext}; use crate::search::new::{QueryGraph, SearchContext};
use crate::Result; use crate::Result;
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {

View File

@ -1,11 +1,12 @@
#![allow(clippy::too_many_arguments)] #![allow(clippy::too_many_arguments)]
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, VecDeque};
use super::empty_paths_cache::EmptyPathsCache; use super::empty_paths_cache::EmptyPathsCache;
use super::{RankingRuleGraph, RankingRuleGraphTrait}; use super::{RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
use crate::Result; use crate::Result;
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, VecDeque};
#[derive(Debug, Clone, PartialEq, Eq, Hash)] #[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Path { pub struct Path {

View File

@ -1,11 +1,12 @@
use std::marker::PhantomData; use std::marker::PhantomData;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::{BitmapOrAllRef, SearchContext};
use crate::Result;
use fxhash::FxHashMap; use fxhash::FxHashMap;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::{BitmapOrAllRef, SearchContext};
use crate::Result;
// TODO: the cache should have a G::EdgeDetails as key // TODO: the cache should have a G::EdgeDetails as key
// but then it means that we should have a quick way of // but then it means that we should have a quick way of
// computing their hash and comparing them // computing their hash and comparing them

View File

@ -1,6 +1,5 @@
use crate::new::small_bitmap::SmallBitmap;
use super::paths_map::PathsMap; use super::paths_map::PathsMap;
use crate::search::new::small_bitmap::SmallBitmap;
#[derive(Clone)] #[derive(Clone)]
pub struct EmptyPathsCache { pub struct EmptyPathsCache {

View File

@ -6,16 +6,17 @@ mod paths_map;
mod proximity; mod proximity;
mod typo; mod typo;
use super::logger::SearchLogger;
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext};
use crate::Result;
pub use edge_docids_cache::EdgeDocidsCache; pub use edge_docids_cache::EdgeDocidsCache;
pub use empty_paths_cache::EmptyPathsCache; pub use empty_paths_cache::EmptyPathsCache;
pub use proximity::ProximityGraph; pub use proximity::ProximityGraph;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
pub use typo::TypoGraph; pub use typo::TypoGraph;
use super::logger::SearchLogger;
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext};
use crate::Result;
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum EdgeDetails<E> { pub enum EdgeDetails<E> {
Unconditional, Unconditional,

View File

@ -1,11 +1,10 @@
use crate::new::small_bitmap::SmallBitmap;
use super::cheapest_paths::Path; use super::cheapest_paths::Path;
use crate::search::new::small_bitmap::SmallBitmap;
// What is PathsMap used for? // What is PathsMap used for?
// For the empty_prefixes field in the EmptyPathsCache only :/ // For the empty_prefixes field in the EmptyPathsCache only :/
// but it could be used for more, like efficient computing of a set of paths // but it could be used for more, like efficient computing of a set of paths
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub struct PathsMap<V> { pub struct PathsMap<V> {
pub nodes: Vec<(u16, PathsMap<V>)>, pub nodes: Vec<(u16, PathsMap<V>)>,

View File

@ -1,12 +1,14 @@
use super::ProximityEdge;
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::new::ranking_rule_graph::proximity::WordPair;
use crate::new::ranking_rule_graph::EdgeDetails;
use crate::new::{QueryNode, SearchContext};
use crate::Result;
use itertools::Itertools;
use std::collections::BTreeMap; use std::collections::BTreeMap;
use itertools::Itertools;
use super::ProximityEdge;
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::search::new::ranking_rule_graph::proximity::WordPair;
use crate::search::new::ranking_rule_graph::EdgeDetails;
use crate::search::new::{QueryNode, SearchContext};
use crate::Result;
pub fn visit_from_node( pub fn visit_from_node(
ctx: &mut SearchContext, ctx: &mut SearchContext,
from_node: &QueryNode, from_node: &QueryNode,

View File

@ -1,8 +1,9 @@
use super::{ProximityEdge, WordPair};
use crate::new::SearchContext;
use crate::{CboRoaringBitmapCodec, Result};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{ProximityEdge, WordPair};
use crate::search::new::SearchContext;
use crate::{CboRoaringBitmapCodec, Result};
pub fn compute_docids<'search>( pub fn compute_docids<'search>(
ctx: &mut SearchContext<'search>, ctx: &mut SearchContext<'search>,
edge: &ProximityEdge, edge: &ProximityEdge,

View File

@ -1,15 +1,16 @@
pub mod build; pub mod build;
pub mod compute_docids; pub mod compute_docids;
use roaring::RoaringBitmap;
use super::empty_paths_cache::EmptyPathsCache; use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeDetails, RankingRuleGraphTrait}; use super::{EdgeDetails, RankingRuleGraphTrait};
use crate::new::interner::Interned; use crate::search::new::interner::Interned;
use crate::new::logger::SearchLogger; use crate::search::new::logger::SearchLogger;
use crate::new::query_term::WordDerivations; use crate::search::new::query_term::WordDerivations;
use crate::new::small_bitmap::SmallBitmap; use crate::search::new::small_bitmap::SmallBitmap;
use crate::new::{QueryGraph, QueryNode, SearchContext}; use crate::search::new::{QueryGraph, QueryNode, SearchContext};
use crate::Result; use crate::Result;
use roaring::RoaringBitmap;
// TODO: intern the proximity edges as well? // TODO: intern the proximity edges as well?

View File

@ -1,15 +1,16 @@
use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::interner::Interned;
use crate::new::logger::SearchLogger;
use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
use crate::new::resolve_query_graph::resolve_phrase;
use crate::new::small_bitmap::SmallBitmap;
use crate::new::{QueryGraph, QueryNode, SearchContext};
use crate::{Result, RoaringBitmapCodec};
use heed::BytesDecode; use heed::BytesDecode;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::Interned;
use crate::search::new::logger::SearchLogger;
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
use crate::search::new::resolve_query_graph::resolve_phrase;
use crate::search::new::small_bitmap::SmallBitmap;
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
use crate::{Result, RoaringBitmapCodec};
#[derive(Clone)] #[derive(Clone)]
pub enum TypoEdge { pub enum TypoEdge {
Phrase { phrase: Interned<Phrase> }, Phrase { phrase: Interned<Phrase> },

View File

@ -1,11 +1,10 @@
use super::logger::SearchLogger;
use super::QueryGraph;
use super::SearchContext;
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
use crate::new::ranking_rule_graph::ProximityGraph;
use crate::new::ranking_rule_graph::TypoGraph;
use crate::new::words::Words;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{QueryGraph, SearchContext};
use crate::search::new::graph_based_ranking_rule::GraphBasedRankingRule;
use crate::search::new::ranking_rule_graph::{ProximityGraph, TypoGraph};
use crate::search::new::words::Words;
// use crate::search::new::sort::Sort; // use crate::search::new::sort::Sort;
use crate::{Result, TermsMatchingStrategy}; use crate::{Result, TermsMatchingStrategy};
@ -239,16 +238,18 @@ pub fn apply_ranking_rules<'search>(
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
// use crate::allocator::ALLOC; // use crate::allocator::ALLOC;
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use crate::new::{execute_search, SearchContext};
use big_s::S;
use heed::EnvOpenOptions;
use maplit::hashset;
use std::fs::File; use std::fs::File;
use std::io::{BufRead, BufReader, Cursor, Seek}; use std::io::{BufRead, BufReader, Cursor, Seek};
use std::time::Instant; use std::time::Instant;
// use crate::new::logger::detailed::DetailedSearchLogger;
use crate::new::logger::DefaultSearchLogger; use big_s::S;
use heed::EnvOpenOptions;
use maplit::hashset;
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
// use crate::search::new::logger::detailed::DetailedSearchLogger;
use crate::search::new::logger::DefaultSearchLogger;
use crate::search::new::{execute_search, SearchContext};
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy}; use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy};
@ -265,7 +266,7 @@ mod tests {
// loop { // loop {
let start = Instant::now(); let start = Instant::now();
// let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log"); // let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
let mut ctx = SearchContext::new(&index, &txn); let mut ctx = SearchContext::new(&index, &txn);
let results = execute_search( let results = execute_search(
&mut ctx, &mut ctx,
@ -362,7 +363,7 @@ mod tests {
// loop { // loop {
let start = Instant::now(); let start = Instant::now();
let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log"); let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
let mut ctx = SearchContext::new(&index, &txn); let mut ctx = SearchContext::new(&index, &txn);
let results = execute_search( let results = execute_search(
&mut ctx, &mut ctx,

View File

@ -1,12 +1,14 @@
use std::collections::VecDeque;
use fxhash::FxHashMap;
use heed::BytesDecode;
use roaring::{MultiOps, RoaringBitmap};
use super::interner::Interned; use super::interner::Interned;
use super::query_term::{Phrase, QueryTerm, WordDerivations}; use super::query_term::{Phrase, QueryTerm, WordDerivations};
use super::small_bitmap::SmallBitmap; use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext}; use super::{QueryGraph, QueryNode, SearchContext};
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec}; use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
use fxhash::FxHashMap;
use heed::BytesDecode;
use roaring::{MultiOps, RoaringBitmap};
use std::collections::VecDeque;
// TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc. // TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc.
#[derive(Default)] #[derive(Default)]

View File

@ -1,3 +1,5 @@
use roaring::RoaringBitmap;
use super::logger::SearchLogger; use super::logger::SearchLogger;
use super::{ use super::{
RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper, RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper,
@ -11,7 +13,6 @@ use crate::{
Index, Index,
Result, Result,
}; };
use roaring::RoaringBitmap;
pub struct Sort<'search, Query> { pub struct Sort<'search, Query> {
field_name: String, field_name: String,

View File

@ -1,9 +1,11 @@
use std::collections::BTreeSet;
use roaring::RoaringBitmap;
use super::logger::SearchLogger; use super::logger::SearchLogger;
use super::resolve_query_graph::resolve_query_graph; use super::resolve_query_graph::resolve_query_graph;
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput, SearchContext}; use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput, SearchContext};
use crate::{Result, TermsMatchingStrategy}; use crate::{Result, TermsMatchingStrategy};
use roaring::RoaringBitmap;
use std::collections::BTreeSet;
pub struct Words { pub struct Words {
exhausted: bool, exhausted: bool,