mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Cargo fmt
This commit is contained in:
parent
10626dddfc
commit
57fa689131
@ -54,8 +54,6 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
||||
#[macro_use]
|
||||
pub mod documents;
|
||||
|
||||
pub use search::new;
|
||||
|
||||
mod asc_desc;
|
||||
mod criterion;
|
||||
mod error;
|
||||
|
@ -1,8 +1,11 @@
|
||||
use super::{interner::Interned, SearchContext};
|
||||
use crate::Result;
|
||||
use std::collections::hash_map::Entry;
|
||||
|
||||
use fxhash::FxHashMap;
|
||||
use heed::types::ByteSlice;
|
||||
use std::collections::hash_map::Entry;
|
||||
|
||||
use super::interner::Interned;
|
||||
use super::SearchContext;
|
||||
use crate::Result;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct DatabaseCache<'search> {
|
||||
|
@ -1,13 +1,13 @@
|
||||
use super::logger::SearchLogger;
|
||||
use super::ranking_rule_graph::EdgeDocidsCache;
|
||||
use super::ranking_rule_graph::EmptyPathsCache;
|
||||
use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::SearchContext;
|
||||
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput};
|
||||
use crate::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::ranking_rule_graph::{
|
||||
EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait,
|
||||
};
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
|
||||
id: String,
|
||||
state: Option<GraphBasedRankingRuleState<G>>,
|
||||
|
@ -1,7 +1,8 @@
|
||||
use fxhash::FxHashMap;
|
||||
use std::hash::Hash;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use fxhash::FxHashMap;
|
||||
|
||||
pub struct Interned<T> {
|
||||
idx: u32,
|
||||
_phantom: PhantomData<T>,
|
||||
|
@ -1,39 +1,37 @@
|
||||
use std::fs::File;
|
||||
use std::io::Write;
|
||||
use std::path::PathBuf;
|
||||
use std::time::Instant;
|
||||
|
||||
use rand::random;
|
||||
use roaring::RoaringBitmap;
|
||||
use std::fs::File;
|
||||
use std::time::Instant;
|
||||
use std::{io::Write, path::PathBuf};
|
||||
|
||||
use crate::new::ranking_rule_graph::TypoGraph;
|
||||
use crate::new::small_bitmap::SmallBitmap;
|
||||
use crate::new::{QueryNode, QueryGraph, SearchContext};
|
||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::new::ranking_rule_graph::EmptyPathsCache;
|
||||
use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
|
||||
use crate::new::ranking_rule_graph::{
|
||||
ProximityGraph, RankingRuleGraph,
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::search::new::ranking_rule_graph::{
|
||||
Edge, EdgeDetails, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
|
||||
TypoGraph,
|
||||
};
|
||||
|
||||
use super::{RankingRule, SearchLogger};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::search::new::{RankingRule, SearchLogger};
|
||||
|
||||
pub enum SearchEvents {
|
||||
RankingRuleStartIteration {
|
||||
ranking_rule_idx: usize,
|
||||
query: QueryGraph,
|
||||
universe: RoaringBitmap,
|
||||
time: Instant
|
||||
time: Instant,
|
||||
},
|
||||
RankingRuleNextBucket {
|
||||
ranking_rule_idx: usize,
|
||||
universe: RoaringBitmap,
|
||||
candidates: RoaringBitmap,
|
||||
time: Instant
|
||||
time: Instant,
|
||||
},
|
||||
RankingRuleEndIteration {
|
||||
ranking_rule_idx: usize,
|
||||
universe: RoaringBitmap,
|
||||
time: Instant
|
||||
time: Instant,
|
||||
},
|
||||
ExtendResults {
|
||||
new: Vec<u32>,
|
||||
@ -57,7 +55,11 @@ pub enum SearchEvents {
|
||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||
cost: u16,
|
||||
},
|
||||
RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap, time: Instant },
|
||||
RankingRuleSkipBucket {
|
||||
ranking_rule_idx: usize,
|
||||
candidates: RoaringBitmap,
|
||||
time: Instant,
|
||||
},
|
||||
}
|
||||
|
||||
pub struct DetailedSearchLogger {
|
||||
@ -106,7 +108,6 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
||||
query: &QueryGraph,
|
||||
universe: &RoaringBitmap,
|
||||
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleStartIteration {
|
||||
ranking_rule_idx,
|
||||
@ -122,7 +123,6 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
candidates: &RoaringBitmap,
|
||||
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleNextBucket {
|
||||
ranking_rule_idx,
|
||||
@ -136,12 +136,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
||||
candidates: &RoaringBitmap,
|
||||
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleSkipBucket {
|
||||
ranking_rule_idx,
|
||||
candidates: candidates.clone(),
|
||||
time: Instant::now()
|
||||
time: Instant::now(),
|
||||
})
|
||||
}
|
||||
|
||||
@ -150,12 +149,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
ranking_rule_idx: usize,
|
||||
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
|
||||
) {
|
||||
self.events.push(SearchEvents::RankingRuleEndIteration {
|
||||
ranking_rule_idx,
|
||||
universe: universe.clone(),
|
||||
time: Instant::now()
|
||||
time: Instant::now(),
|
||||
})
|
||||
}
|
||||
fn add_to_results(&mut self, docids: &[u32]) {
|
||||
@ -166,18 +164,47 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
|
||||
}
|
||||
|
||||
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<(u16, SmallBitmap)>>, cost: u16,) {
|
||||
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
||||
}
|
||||
|
||||
fn log_typo_state(&mut self, query_graph: &RankingRuleGraph<TypoGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<(u16, SmallBitmap)>>, cost: u16,) {
|
||||
self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
||||
fn log_proximity_state(
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||
paths_map: &[Vec<u16>],
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
universe: &RoaringBitmap,
|
||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||
cost: u16,
|
||||
) {
|
||||
self.events.push(SearchEvents::ProximityState {
|
||||
graph: query_graph.clone(),
|
||||
paths: paths_map.to_vec(),
|
||||
empty_paths_cache: empty_paths_cache.clone(),
|
||||
universe: universe.clone(),
|
||||
distances,
|
||||
cost,
|
||||
})
|
||||
}
|
||||
|
||||
fn log_typo_state(
|
||||
&mut self,
|
||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||
paths_map: &[Vec<u16>],
|
||||
empty_paths_cache: &EmptyPathsCache,
|
||||
universe: &RoaringBitmap,
|
||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||
cost: u16,
|
||||
) {
|
||||
self.events.push(SearchEvents::TypoState {
|
||||
graph: query_graph.clone(),
|
||||
paths: paths_map.to_vec(),
|
||||
empty_paths_cache: empty_paths_cache.clone(),
|
||||
universe: universe.clone(),
|
||||
distances,
|
||||
cost,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
impl DetailedSearchLogger {
|
||||
pub fn write_d2_description(&self,ctx: &mut SearchContext,) {
|
||||
pub fn write_d2_description(&self, ctx: &mut SearchContext) {
|
||||
let mut prev_time = self.initial_query_time.unwrap();
|
||||
let mut timestamp = vec![];
|
||||
fn activated_id(timestamp: &[usize]) -> String {
|
||||
@ -229,21 +256,29 @@ impl DetailedSearchLogger {
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
writeln!(&mut file,
|
||||
"{ranking_rule_idx}.{self_activated_id} {{
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{ranking_rule_idx}.{self_activated_id} {{
|
||||
style {{
|
||||
fill: \"#D8A7B1\"
|
||||
}}
|
||||
}}").unwrap();
|
||||
}}"
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
SearchEvents::RankingRuleNextBucket { ranking_rule_idx, time, universe, candidates } => {
|
||||
SearchEvents::RankingRuleNextBucket {
|
||||
ranking_rule_idx,
|
||||
time,
|
||||
universe,
|
||||
candidates,
|
||||
} => {
|
||||
let _elapsed = time.duration_since(prev_time);
|
||||
prev_time = *time;
|
||||
let old_activated_id = activated_id(×tamp);
|
||||
// writeln!(&mut file, "time.{old_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap();
|
||||
*timestamp.last_mut().unwrap() += 1;
|
||||
let next_activated_id = activated_id(×tamp);
|
||||
writeln!(&mut file,
|
||||
writeln!(&mut file,
|
||||
"{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : next bucket {}/{}", candidates.len(), universe.len())
|
||||
.unwrap();
|
||||
}
|
||||
@ -255,7 +290,7 @@ impl DetailedSearchLogger {
|
||||
*timestamp.last_mut().unwrap() += 1;
|
||||
let next_activated_id = activated_id(×tamp);
|
||||
let len = candidates.len();
|
||||
writeln!(&mut file,
|
||||
writeln!(&mut file,
|
||||
"{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : skip bucket ({len})",)
|
||||
.unwrap();
|
||||
}
|
||||
@ -280,14 +315,14 @@ impl DetailedSearchLogger {
|
||||
}
|
||||
SearchEvents::ExtendResults { new } => {
|
||||
if new.is_empty() {
|
||||
continue
|
||||
continue;
|
||||
}
|
||||
let cur_ranking_rule = timestamp.len() - 1;
|
||||
let cur_activated_id = activated_id(×tamp);
|
||||
let docids = new.iter().collect::<Vec<_>>();
|
||||
let len = new.len();
|
||||
let random = random::<u64>();
|
||||
|
||||
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{cur_ranking_rule}.{cur_activated_id} -> results.{random} : \"add {len}\"
|
||||
@ -300,7 +335,7 @@ results.{random} {{
|
||||
"
|
||||
)
|
||||
.unwrap();
|
||||
},
|
||||
}
|
||||
SearchEvents::WordsState { query_graph } => {
|
||||
let cur_ranking_rule = timestamp.len() - 1;
|
||||
*timestamp.last_mut().unwrap() += 1;
|
||||
@ -314,9 +349,18 @@ results.{random} {{
|
||||
&mut file,
|
||||
"{id} {{
|
||||
link: \"{id}.d2.svg\"
|
||||
}}").unwrap();
|
||||
},
|
||||
SearchEvents::ProximityState { graph, paths, empty_paths_cache, universe, distances, cost } => {
|
||||
}}"
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
SearchEvents::ProximityState {
|
||||
graph,
|
||||
paths,
|
||||
empty_paths_cache,
|
||||
universe,
|
||||
distances,
|
||||
cost,
|
||||
} => {
|
||||
let cur_ranking_rule = timestamp.len() - 1;
|
||||
*timestamp.last_mut().unwrap() += 1;
|
||||
let cur_activated_id = activated_id(×tamp);
|
||||
@ -324,15 +368,32 @@ results.{random} {{
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::ranking_rule_graph_d2_description(ctx, graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
|
||||
Self::ranking_rule_graph_d2_description(
|
||||
ctx,
|
||||
graph,
|
||||
paths,
|
||||
empty_paths_cache,
|
||||
distances.clone(),
|
||||
&mut new_file,
|
||||
);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
link: \"{id}.d2.svg\"
|
||||
tooltip: \"cost {cost}, universe len: {}\"
|
||||
}}", universe.len()).unwrap();
|
||||
},
|
||||
SearchEvents::TypoState { graph, paths, empty_paths_cache, universe, distances, cost } => {
|
||||
}}",
|
||||
universe.len()
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
SearchEvents::TypoState {
|
||||
graph,
|
||||
paths,
|
||||
empty_paths_cache,
|
||||
universe,
|
||||
distances,
|
||||
cost,
|
||||
} => {
|
||||
let cur_ranking_rule = timestamp.len() - 1;
|
||||
*timestamp.last_mut().unwrap() += 1;
|
||||
let cur_activated_id = activated_id(×tamp);
|
||||
@ -340,89 +401,130 @@ results.{random} {{
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::ranking_rule_graph_d2_description(ctx,graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
|
||||
Self::ranking_rule_graph_d2_description(
|
||||
ctx,
|
||||
graph,
|
||||
paths,
|
||||
empty_paths_cache,
|
||||
distances.clone(),
|
||||
&mut new_file,
|
||||
);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
link: \"{id}.d2.svg\"
|
||||
tooltip: \"cost {cost}, universe len: {}\"
|
||||
}}", universe.len()).unwrap();
|
||||
},
|
||||
}}",
|
||||
universe.len()
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
writeln!(&mut file, "}}").unwrap();
|
||||
}
|
||||
|
||||
fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, distances: &[(u16, SmallBitmap)], file: &mut File) {
|
||||
|
||||
fn query_node_d2_desc(
|
||||
ctx: &mut SearchContext,
|
||||
node_idx: usize,
|
||||
node: &QueryNode,
|
||||
distances: &[(u16, SmallBitmap)],
|
||||
file: &mut File,
|
||||
) {
|
||||
match &node {
|
||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => {
|
||||
match value {
|
||||
QueryTerm::Phrase { phrase } => {
|
||||
let phrase = ctx.phrase_interner.get(*phrase);
|
||||
let phrase_str = phrase.description(&ctx.word_interner);
|
||||
writeln!(file,"{node_idx} : \"{phrase_str}\"").unwrap();
|
||||
},
|
||||
QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db, synonyms, split_words } } => {
|
||||
let original = ctx.word_interner.get(*original);
|
||||
writeln!(file,"{node_idx} : \"{original}\" {{
|
||||
shape: class").unwrap();
|
||||
for w in zero_typo.iter().copied() {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "\"{w}\" : 0").unwrap();
|
||||
}
|
||||
for w in one_typo.iter().copied() {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "\"{w}\" : 1").unwrap();
|
||||
}
|
||||
for w in two_typos.iter().copied() {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "\"{w}\" : 2").unwrap();
|
||||
}
|
||||
if let Some(split_words) = split_words {
|
||||
let phrase = ctx.phrase_interner.get(*split_words);
|
||||
let phrase_str = phrase.description(&ctx.word_interner);
|
||||
writeln!(file, "\"{phrase_str}\" : split_words").unwrap();
|
||||
}
|
||||
for synonym in synonyms.iter().copied() {
|
||||
let phrase = ctx.phrase_interner.get(synonym);
|
||||
let phrase_str = phrase.description(&ctx.word_interner);
|
||||
writeln!(file, "\"{phrase_str}\" : synonym").unwrap();
|
||||
}
|
||||
if *use_prefix_db {
|
||||
writeln!(file, "use prefix DB : true").unwrap();
|
||||
}
|
||||
for (d, edges) in distances.iter() {
|
||||
writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::<Vec<_>>() ).unwrap();
|
||||
}
|
||||
|
||||
writeln!(file, "}}").unwrap();
|
||||
},
|
||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
|
||||
QueryTerm::Phrase { phrase } => {
|
||||
let phrase = ctx.phrase_interner.get(*phrase);
|
||||
let phrase_str = phrase.description(&ctx.word_interner);
|
||||
writeln!(file, "{node_idx} : \"{phrase_str}\"").unwrap();
|
||||
}
|
||||
QueryTerm::Word {
|
||||
derivations:
|
||||
WordDerivations {
|
||||
original,
|
||||
zero_typo,
|
||||
one_typo,
|
||||
two_typos,
|
||||
use_prefix_db,
|
||||
synonyms,
|
||||
split_words,
|
||||
},
|
||||
} => {
|
||||
let original = ctx.word_interner.get(*original);
|
||||
writeln!(
|
||||
file,
|
||||
"{node_idx} : \"{original}\" {{
|
||||
shape: class"
|
||||
)
|
||||
.unwrap();
|
||||
for w in zero_typo.iter().copied() {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "\"{w}\" : 0").unwrap();
|
||||
}
|
||||
for w in one_typo.iter().copied() {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "\"{w}\" : 1").unwrap();
|
||||
}
|
||||
for w in two_typos.iter().copied() {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "\"{w}\" : 2").unwrap();
|
||||
}
|
||||
if let Some(split_words) = split_words {
|
||||
let phrase = ctx.phrase_interner.get(*split_words);
|
||||
let phrase_str = phrase.description(&ctx.word_interner);
|
||||
writeln!(file, "\"{phrase_str}\" : split_words").unwrap();
|
||||
}
|
||||
for synonym in synonyms.iter().copied() {
|
||||
let phrase = ctx.phrase_interner.get(synonym);
|
||||
let phrase_str = phrase.description(&ctx.word_interner);
|
||||
writeln!(file, "\"{phrase_str}\" : synonym").unwrap();
|
||||
}
|
||||
if *use_prefix_db {
|
||||
writeln!(file, "use prefix DB : true").unwrap();
|
||||
}
|
||||
for (d, edges) in distances.iter() {
|
||||
writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::<Vec<_>>())
|
||||
.unwrap();
|
||||
}
|
||||
|
||||
writeln!(file, "}}").unwrap();
|
||||
}
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => {
|
||||
writeln!(file,"{node_idx} : START").unwrap();
|
||||
},
|
||||
writeln!(file, "{node_idx} : START").unwrap();
|
||||
}
|
||||
QueryNode::End => {
|
||||
writeln!(file,"{node_idx} : END").unwrap();
|
||||
},
|
||||
writeln!(file, "{node_idx} : END").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
fn query_graph_d2_description(ctx: &mut SearchContext, query_graph: &QueryGraph, file: &mut File) {
|
||||
writeln!(file,"direction: right").unwrap();
|
||||
fn query_graph_d2_description(
|
||||
ctx: &mut SearchContext,
|
||||
query_graph: &QueryGraph,
|
||||
file: &mut File,
|
||||
) {
|
||||
writeln!(file, "direction: right").unwrap();
|
||||
for node in 0..query_graph.nodes.len() {
|
||||
if matches!(query_graph.nodes[node], QueryNode::Deleted) {
|
||||
continue;
|
||||
}
|
||||
Self::query_node_d2_desc(ctx, node, &query_graph.nodes[node], &[], file);
|
||||
|
||||
|
||||
for edge in query_graph.edges[node].successors.iter() {
|
||||
writeln!(file, "{node} -> {edge};\n").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<(u16, SmallBitmap)>>, file: &mut File) {
|
||||
writeln!(file,"direction: right").unwrap();
|
||||
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(
|
||||
ctx: &mut SearchContext,
|
||||
graph: &RankingRuleGraph<R>,
|
||||
paths: &[Vec<u16>],
|
||||
_empty_paths_cache: &EmptyPathsCache,
|
||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||
file: &mut File,
|
||||
) {
|
||||
writeln!(file, "direction: right").unwrap();
|
||||
|
||||
writeln!(file, "Proximity Graph {{").unwrap();
|
||||
for (node_idx, node) in graph.query_graph.nodes.iter().enumerate() {
|
||||
@ -437,17 +539,21 @@ shape: class").unwrap();
|
||||
|
||||
match &details {
|
||||
EdgeDetails::Unconditional => {
|
||||
writeln!(file,
|
||||
writeln!(
|
||||
file,
|
||||
"{from_node} -> {to_node} : \"always cost {cost}\"",
|
||||
cost = edge.cost,
|
||||
).unwrap();
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
EdgeDetails::Data(details) => {
|
||||
writeln!(file,
|
||||
writeln!(
|
||||
file,
|
||||
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
|
||||
cost = edge.cost,
|
||||
edge_label = R::graphviz_edge_details_label(details)
|
||||
).unwrap();
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
@ -457,12 +563,11 @@ shape: class").unwrap();
|
||||
// Self::paths_d2_description(graph, paths, file);
|
||||
// writeln!(file, "}}").unwrap();
|
||||
|
||||
|
||||
writeln!(file, "Shortest Paths {{").unwrap();
|
||||
Self::paths_d2_description(ctx, graph, paths, file);
|
||||
writeln!(file, "}}").unwrap();
|
||||
|
||||
// writeln!(file, "Empty Edge Couples {{").unwrap();
|
||||
// writeln!(file, "Empty Edge Couples {{").unwrap();
|
||||
// for (i, (e1, e2)) in empty_paths_cache.empty_couple_edges.iter().enumerate() {
|
||||
// writeln!(file, "{i} : \"\" {{").unwrap();
|
||||
// Self::edge_d2_description(graph, *e1, file);
|
||||
@ -478,18 +583,24 @@ shape: class").unwrap();
|
||||
// }
|
||||
// writeln!(file, "}}").unwrap();
|
||||
}
|
||||
fn edge_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, edge_idx: u16, file: &mut File) {
|
||||
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
|
||||
fn edge_d2_description<R: RankingRuleGraphTrait>(
|
||||
ctx: &mut SearchContext,
|
||||
graph: &RankingRuleGraph<R>,
|
||||
edge_idx: u16,
|
||||
file: &mut File,
|
||||
) {
|
||||
let Edge { from_node, to_node, cost, .. } =
|
||||
graph.all_edges[edge_idx as usize].as_ref().unwrap();
|
||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
||||
let from_node_desc = match from_node {
|
||||
QueryNode::Term(term) => match &term.value {
|
||||
QueryTerm::Phrase { phrase } => {
|
||||
let phrase = ctx.phrase_interner.get(*phrase);
|
||||
phrase.description(&ctx.word_interner)
|
||||
},
|
||||
}
|
||||
QueryTerm::Word { derivations } => {
|
||||
ctx.word_interner.get(derivations.original).to_owned()
|
||||
},
|
||||
}
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => "START".to_owned(),
|
||||
@ -501,18 +612,29 @@ shape: class").unwrap();
|
||||
QueryTerm::Phrase { phrase } => {
|
||||
let phrase = ctx.phrase_interner.get(*phrase);
|
||||
phrase.description(&ctx.word_interner)
|
||||
},
|
||||
QueryTerm::Word { derivations } => ctx.word_interner.get(derivations.original).to_owned(),
|
||||
}
|
||||
QueryTerm::Word { derivations } => {
|
||||
ctx.word_interner.get(derivations.original).to_owned()
|
||||
}
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => "START".to_owned(),
|
||||
QueryNode::End => "END".to_owned(),
|
||||
};
|
||||
writeln!(file, "{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
|
||||
writeln!(
|
||||
file,
|
||||
"{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
|
||||
shape: class
|
||||
}}").unwrap();
|
||||
}}"
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
fn paths_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], file: &mut File) {
|
||||
fn paths_d2_description<R: RankingRuleGraphTrait>(
|
||||
ctx: &mut SearchContext,
|
||||
graph: &RankingRuleGraph<R>,
|
||||
paths: &[Vec<u16>],
|
||||
file: &mut File,
|
||||
) {
|
||||
for (path_idx, edge_indexes) in paths.iter().enumerate() {
|
||||
writeln!(file, "{path_idx} {{").unwrap();
|
||||
for edge_idx in edge_indexes.iter() {
|
||||
|
@ -3,11 +3,9 @@ pub mod detailed;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{
|
||||
ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph},
|
||||
small_bitmap::SmallBitmap,
|
||||
RankingRule, RankingRuleQueryTrait,
|
||||
};
|
||||
use super::ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph};
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{RankingRule, RankingRuleQueryTrait};
|
||||
|
||||
pub struct DefaultSearchLogger;
|
||||
impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||
|
@ -11,12 +11,8 @@ mod small_bitmap;
|
||||
mod sort;
|
||||
mod words;
|
||||
|
||||
use self::interner::Interner;
|
||||
use self::logger::SearchLogger;
|
||||
use self::query_term::Phrase;
|
||||
use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
|
||||
use crate::new::query_term::located_query_terms_from_string;
|
||||
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use charabia::Tokenize;
|
||||
use db_cache::DatabaseCache;
|
||||
use heed::RoTxn;
|
||||
@ -26,7 +22,13 @@ pub use ranking_rules::{
|
||||
RankingRuleOutputIterWrapper, RankingRuleQueryTrait,
|
||||
};
|
||||
use roaring::RoaringBitmap;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use self::interner::Interner;
|
||||
use self::logger::SearchLogger;
|
||||
use self::query_term::Phrase;
|
||||
use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
|
||||
use crate::search::new::query_term::located_query_terms_from_string;
|
||||
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
||||
|
||||
pub enum BitmapOrAllRef<'s> {
|
||||
Bitmap(&'s RoaringBitmap),
|
||||
|
@ -12,13 +12,12 @@ use heed::types::DecodeIgnore;
|
||||
use heed::RoTxn;
|
||||
use itertools::Itertools;
|
||||
|
||||
use super::interner::{Interned, Interner};
|
||||
use super::SearchContext;
|
||||
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||
use crate::search::{build_dfa, get_first};
|
||||
use crate::{CboRoaringBitmapLenCodec, Index, Result};
|
||||
|
||||
use super::interner::{Interned, Interner};
|
||||
use super::SearchContext;
|
||||
|
||||
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Phrase {
|
||||
pub words: Vec<Option<Interned<String>>>,
|
||||
|
@ -1,8 +1,8 @@
|
||||
use std::collections::HashSet;
|
||||
|
||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::small_bitmap::SmallBitmap;
|
||||
use crate::new::{QueryGraph, SearchContext};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::search::new::{QueryGraph, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
|
@ -1,11 +1,12 @@
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::small_bitmap::SmallBitmap;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::Result;
|
||||
use std::collections::btree_map::Entry;
|
||||
use std::collections::{BTreeMap, VecDeque};
|
||||
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Path {
|
||||
|
@ -1,11 +1,12 @@
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::{BitmapOrAllRef, SearchContext};
|
||||
use crate::Result;
|
||||
use fxhash::FxHashMap;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::{BitmapOrAllRef, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
// TODO: the cache should have a G::EdgeDetails as key
|
||||
// but then it means that we should have a quick way of
|
||||
// computing their hash and comparing them
|
||||
|
@ -1,6 +1,5 @@
|
||||
use crate::new::small_bitmap::SmallBitmap;
|
||||
|
||||
use super::paths_map::PathsMap;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct EmptyPathsCache {
|
||||
|
@ -6,16 +6,17 @@ mod paths_map;
|
||||
mod proximity;
|
||||
mod typo;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
pub use edge_docids_cache::EdgeDocidsCache;
|
||||
pub use empty_paths_cache::EmptyPathsCache;
|
||||
pub use proximity::ProximityGraph;
|
||||
use roaring::RoaringBitmap;
|
||||
pub use typo::TypoGraph;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum EdgeDetails<E> {
|
||||
Unconditional,
|
||||
|
@ -1,11 +1,10 @@
|
||||
use crate::new::small_bitmap::SmallBitmap;
|
||||
use super::cheapest_paths::Path;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
|
||||
// What is PathsMap used for?
|
||||
// For the empty_prefixes field in the EmptyPathsCache only :/
|
||||
// but it could be used for more, like efficient computing of a set of paths
|
||||
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct PathsMap<V> {
|
||||
pub nodes: Vec<(u16, PathsMap<V>)>,
|
||||
@ -53,10 +52,10 @@ impl<V> PathsMap<V> {
|
||||
}
|
||||
}
|
||||
fn remove_first_rec(&mut self, cur: &mut Vec<u16>) -> (bool, V) {
|
||||
let Some((first_edge, rest)) = self.nodes.first_mut() else {
|
||||
let Some((first_edge, rest)) = self.nodes.first_mut() else {
|
||||
// The PathsMap has to be correct by construction here, otherwise
|
||||
// the unwrap() will crash
|
||||
return (true, self.value.take().unwrap())
|
||||
return (true, self.value.take().unwrap())
|
||||
};
|
||||
cur.push(*first_edge);
|
||||
let (rest_is_empty, value) = rest.remove_first_rec(cur);
|
||||
|
@ -1,12 +1,14 @@
|
||||
use super::ProximityEdge;
|
||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::new::ranking_rule_graph::proximity::WordPair;
|
||||
use crate::new::ranking_rule_graph::EdgeDetails;
|
||||
use crate::new::{QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
use itertools::Itertools;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use itertools::Itertools;
|
||||
|
||||
use super::ProximityEdge;
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
||||
use crate::search::new::ranking_rule_graph::EdgeDetails;
|
||||
use crate::search::new::{QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
|
||||
pub fn visit_from_node(
|
||||
ctx: &mut SearchContext,
|
||||
from_node: &QueryNode,
|
||||
|
@ -1,8 +1,9 @@
|
||||
use super::{ProximityEdge, WordPair};
|
||||
use crate::new::SearchContext;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{ProximityEdge, WordPair};
|
||||
use crate::search::new::SearchContext;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
|
||||
pub fn compute_docids<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge: &ProximityEdge,
|
||||
|
@ -1,15 +1,16 @@
|
||||
pub mod build;
|
||||
pub mod compute_docids;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
use super::{EdgeDetails, RankingRuleGraphTrait};
|
||||
use crate::new::interner::Interned;
|
||||
use crate::new::logger::SearchLogger;
|
||||
use crate::new::query_term::WordDerivations;
|
||||
use crate::new::small_bitmap::SmallBitmap;
|
||||
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::search::new::interner::Interned;
|
||||
use crate::search::new::logger::SearchLogger;
|
||||
use crate::search::new::query_term::WordDerivations;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
// TODO: intern the proximity edges as well?
|
||||
|
||||
|
@ -1,15 +1,16 @@
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::interner::Interned;
|
||||
use crate::new::logger::SearchLogger;
|
||||
use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
||||
use crate::new::resolve_query_graph::resolve_phrase;
|
||||
use crate::new::small_bitmap::SmallBitmap;
|
||||
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::{Result, RoaringBitmapCodec};
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::Interned;
|
||||
use crate::search::new::logger::SearchLogger;
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
||||
use crate::search::new::resolve_query_graph::resolve_phrase;
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::{Result, RoaringBitmapCodec};
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum TypoEdge {
|
||||
Phrase { phrase: Interned<Phrase> },
|
||||
|
@ -1,11 +1,10 @@
|
||||
use super::logger::SearchLogger;
|
||||
use super::QueryGraph;
|
||||
use super::SearchContext;
|
||||
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
|
||||
use crate::new::ranking_rule_graph::ProximityGraph;
|
||||
use crate::new::ranking_rule_graph::TypoGraph;
|
||||
use crate::new::words::Words;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::{QueryGraph, SearchContext};
|
||||
use crate::search::new::graph_based_ranking_rule::GraphBasedRankingRule;
|
||||
use crate::search::new::ranking_rule_graph::{ProximityGraph, TypoGraph};
|
||||
use crate::search::new::words::Words;
|
||||
// use crate::search::new::sort::Sort;
|
||||
use crate::{Result, TermsMatchingStrategy};
|
||||
|
||||
@ -239,16 +238,18 @@ pub fn apply_ranking_rules<'search>(
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
// use crate::allocator::ALLOC;
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::new::{execute_search, SearchContext};
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashset;
|
||||
use std::fs::File;
|
||||
use std::io::{BufRead, BufReader, Cursor, Seek};
|
||||
use std::time::Instant;
|
||||
// use crate::new::logger::detailed::DetailedSearchLogger;
|
||||
use crate::new::logger::DefaultSearchLogger;
|
||||
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashset;
|
||||
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
// use crate::search::new::logger::detailed::DetailedSearchLogger;
|
||||
use crate::search::new::logger::DefaultSearchLogger;
|
||||
use crate::search::new::{execute_search, SearchContext};
|
||||
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy};
|
||||
|
||||
@ -265,7 +266,7 @@ mod tests {
|
||||
// loop {
|
||||
let start = Instant::now();
|
||||
|
||||
// let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||
// let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let results = execute_search(
|
||||
&mut ctx,
|
||||
@ -362,7 +363,7 @@ mod tests {
|
||||
// loop {
|
||||
let start = Instant::now();
|
||||
|
||||
let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||
let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let results = execute_search(
|
||||
&mut ctx,
|
||||
|
@ -1,12 +1,14 @@
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use fxhash::FxHashMap;
|
||||
use heed::BytesDecode;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use super::interner::Interned;
|
||||
use super::query_term::{Phrase, QueryTerm, WordDerivations};
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
|
||||
use fxhash::FxHashMap;
|
||||
use heed::BytesDecode;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use std::collections::VecDeque;
|
||||
|
||||
// TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc.
|
||||
#[derive(Default)]
|
||||
|
@ -1,3 +1,5 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::{
|
||||
RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper,
|
||||
@ -11,7 +13,6 @@ use crate::{
|
||||
Index,
|
||||
Result,
|
||||
};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
pub struct Sort<'search, Query> {
|
||||
field_name: String,
|
||||
|
@ -1,9 +1,11 @@
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::resolve_query_graph::resolve_query_graph;
|
||||
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput, SearchContext};
|
||||
use crate::{Result, TermsMatchingStrategy};
|
||||
use roaring::RoaringBitmap;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
pub struct Words {
|
||||
exhausted: bool,
|
||||
|
Loading…
Reference in New Issue
Block a user