Cargo fmt

This commit is contained in:
Loïc Lecrenier 2023-03-08 09:55:53 +01:00
parent 10626dddfc
commit 57fa689131
22 changed files with 348 additions and 213 deletions

View File

@ -54,8 +54,6 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
#[macro_use]
pub mod documents;
pub use search::new;
mod asc_desc;
mod criterion;
mod error;

View File

@ -1,8 +1,11 @@
use super::{interner::Interned, SearchContext};
use crate::Result;
use std::collections::hash_map::Entry;
use fxhash::FxHashMap;
use heed::types::ByteSlice;
use std::collections::hash_map::Entry;
use super::interner::Interned;
use super::SearchContext;
use crate::Result;
#[derive(Default)]
pub struct DatabaseCache<'search> {

View File

@ -1,13 +1,13 @@
use super::logger::SearchLogger;
use super::ranking_rule_graph::EdgeDocidsCache;
use super::ranking_rule_graph::EmptyPathsCache;
use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait};
use super::small_bitmap::SmallBitmap;
use super::SearchContext;
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput};
use crate::Result;
use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::ranking_rule_graph::{
EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait,
};
use super::small_bitmap::SmallBitmap;
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
use crate::Result;
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
id: String,
state: Option<GraphBasedRankingRuleState<G>>,

View File

@ -1,7 +1,8 @@
use fxhash::FxHashMap;
use std::hash::Hash;
use std::marker::PhantomData;
use fxhash::FxHashMap;
pub struct Interned<T> {
idx: u32,
_phantom: PhantomData<T>,

View File

@ -1,39 +1,37 @@
use std::fs::File;
use std::io::Write;
use std::path::PathBuf;
use std::time::Instant;
use rand::random;
use roaring::RoaringBitmap;
use std::fs::File;
use std::time::Instant;
use std::{io::Write, path::PathBuf};
use crate::new::ranking_rule_graph::TypoGraph;
use crate::new::small_bitmap::SmallBitmap;
use crate::new::{QueryNode, QueryGraph, SearchContext};
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::new::ranking_rule_graph::EmptyPathsCache;
use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
use crate::new::ranking_rule_graph::{
ProximityGraph, RankingRuleGraph,
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::search::new::ranking_rule_graph::{
Edge, EdgeDetails, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
TypoGraph,
};
use super::{RankingRule, SearchLogger};
use crate::search::new::small_bitmap::SmallBitmap;
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
use crate::search::new::{RankingRule, SearchLogger};
pub enum SearchEvents {
RankingRuleStartIteration {
ranking_rule_idx: usize,
query: QueryGraph,
universe: RoaringBitmap,
time: Instant
time: Instant,
},
RankingRuleNextBucket {
ranking_rule_idx: usize,
universe: RoaringBitmap,
candidates: RoaringBitmap,
time: Instant
time: Instant,
},
RankingRuleEndIteration {
ranking_rule_idx: usize,
universe: RoaringBitmap,
time: Instant
time: Instant,
},
ExtendResults {
new: Vec<u32>,
@ -57,7 +55,11 @@ pub enum SearchEvents {
distances: Vec<Vec<(u16, SmallBitmap)>>,
cost: u16,
},
RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap, time: Instant },
RankingRuleSkipBucket {
ranking_rule_idx: usize,
candidates: RoaringBitmap,
time: Instant,
},
}
pub struct DetailedSearchLogger {
@ -106,7 +108,6 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
query: &QueryGraph,
universe: &RoaringBitmap,
) {
self.events.push(SearchEvents::RankingRuleStartIteration {
ranking_rule_idx,
@ -122,7 +123,6 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
universe: &RoaringBitmap,
candidates: &RoaringBitmap,
) {
self.events.push(SearchEvents::RankingRuleNextBucket {
ranking_rule_idx,
@ -136,12 +136,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
ranking_rule_idx: usize,
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
candidates: &RoaringBitmap,
) {
self.events.push(SearchEvents::RankingRuleSkipBucket {
ranking_rule_idx,
candidates: candidates.clone(),
time: Instant::now()
time: Instant::now(),
})
}
@ -150,12 +149,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
ranking_rule_idx: usize,
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
universe: &RoaringBitmap,
) {
self.events.push(SearchEvents::RankingRuleEndIteration {
ranking_rule_idx,
universe: universe.clone(),
time: Instant::now()
time: Instant::now(),
})
}
fn add_to_results(&mut self, docids: &[u32]) {
@ -166,18 +164,47 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
}
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<(u16, SmallBitmap)>>, cost: u16,) {
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
}
fn log_typo_state(&mut self, query_graph: &RankingRuleGraph<TypoGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<(u16, SmallBitmap)>>, cost: u16,) {
self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
fn log_proximity_state(
&mut self,
query_graph: &RankingRuleGraph<ProximityGraph>,
paths_map: &[Vec<u16>],
empty_paths_cache: &EmptyPathsCache,
universe: &RoaringBitmap,
distances: Vec<Vec<(u16, SmallBitmap)>>,
cost: u16,
) {
self.events.push(SearchEvents::ProximityState {
graph: query_graph.clone(),
paths: paths_map.to_vec(),
empty_paths_cache: empty_paths_cache.clone(),
universe: universe.clone(),
distances,
cost,
})
}
fn log_typo_state(
&mut self,
query_graph: &RankingRuleGraph<TypoGraph>,
paths_map: &[Vec<u16>],
empty_paths_cache: &EmptyPathsCache,
universe: &RoaringBitmap,
distances: Vec<Vec<(u16, SmallBitmap)>>,
cost: u16,
) {
self.events.push(SearchEvents::TypoState {
graph: query_graph.clone(),
paths: paths_map.to_vec(),
empty_paths_cache: empty_paths_cache.clone(),
universe: universe.clone(),
distances,
cost,
})
}
}
impl DetailedSearchLogger {
pub fn write_d2_description(&self,ctx: &mut SearchContext,) {
pub fn write_d2_description(&self, ctx: &mut SearchContext) {
let mut prev_time = self.initial_query_time.unwrap();
let mut timestamp = vec![];
fn activated_id(timestamp: &[usize]) -> String {
@ -229,21 +256,29 @@ impl DetailedSearchLogger {
)
.unwrap();
}
writeln!(&mut file,
"{ranking_rule_idx}.{self_activated_id} {{
writeln!(
&mut file,
"{ranking_rule_idx}.{self_activated_id} {{
style {{
fill: \"#D8A7B1\"
}}
}}").unwrap();
}}"
)
.unwrap();
}
SearchEvents::RankingRuleNextBucket { ranking_rule_idx, time, universe, candidates } => {
SearchEvents::RankingRuleNextBucket {
ranking_rule_idx,
time,
universe,
candidates,
} => {
let _elapsed = time.duration_since(prev_time);
prev_time = *time;
let old_activated_id = activated_id(&timestamp);
// writeln!(&mut file, "time.{old_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap();
*timestamp.last_mut().unwrap() += 1;
let next_activated_id = activated_id(&timestamp);
writeln!(&mut file,
writeln!(&mut file,
"{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : next bucket {}/{}", candidates.len(), universe.len())
.unwrap();
}
@ -255,7 +290,7 @@ impl DetailedSearchLogger {
*timestamp.last_mut().unwrap() += 1;
let next_activated_id = activated_id(&timestamp);
let len = candidates.len();
writeln!(&mut file,
writeln!(&mut file,
"{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : skip bucket ({len})",)
.unwrap();
}
@ -280,14 +315,14 @@ impl DetailedSearchLogger {
}
SearchEvents::ExtendResults { new } => {
if new.is_empty() {
continue
continue;
}
let cur_ranking_rule = timestamp.len() - 1;
let cur_activated_id = activated_id(&timestamp);
let docids = new.iter().collect::<Vec<_>>();
let len = new.len();
let random = random::<u64>();
writeln!(
&mut file,
"{cur_ranking_rule}.{cur_activated_id} -> results.{random} : \"add {len}\"
@ -300,7 +335,7 @@ results.{random} {{
"
)
.unwrap();
},
}
SearchEvents::WordsState { query_graph } => {
let cur_ranking_rule = timestamp.len() - 1;
*timestamp.last_mut().unwrap() += 1;
@ -314,9 +349,18 @@ results.{random} {{
&mut file,
"{id} {{
link: \"{id}.d2.svg\"
}}").unwrap();
},
SearchEvents::ProximityState { graph, paths, empty_paths_cache, universe, distances, cost } => {
}}"
)
.unwrap();
}
SearchEvents::ProximityState {
graph,
paths,
empty_paths_cache,
universe,
distances,
cost,
} => {
let cur_ranking_rule = timestamp.len() - 1;
*timestamp.last_mut().unwrap() += 1;
let cur_activated_id = activated_id(&timestamp);
@ -324,15 +368,32 @@ results.{random} {{
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
let new_file_path = self.folder_path.join(format!("{id}.d2"));
let mut new_file = std::fs::File::create(new_file_path).unwrap();
Self::ranking_rule_graph_d2_description(ctx, graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
Self::ranking_rule_graph_d2_description(
ctx,
graph,
paths,
empty_paths_cache,
distances.clone(),
&mut new_file,
);
writeln!(
&mut file,
"{id} {{
link: \"{id}.d2.svg\"
tooltip: \"cost {cost}, universe len: {}\"
}}", universe.len()).unwrap();
},
SearchEvents::TypoState { graph, paths, empty_paths_cache, universe, distances, cost } => {
}}",
universe.len()
)
.unwrap();
}
SearchEvents::TypoState {
graph,
paths,
empty_paths_cache,
universe,
distances,
cost,
} => {
let cur_ranking_rule = timestamp.len() - 1;
*timestamp.last_mut().unwrap() += 1;
let cur_activated_id = activated_id(&timestamp);
@ -340,89 +401,130 @@ results.{random} {{
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
let new_file_path = self.folder_path.join(format!("{id}.d2"));
let mut new_file = std::fs::File::create(new_file_path).unwrap();
Self::ranking_rule_graph_d2_description(ctx,graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
Self::ranking_rule_graph_d2_description(
ctx,
graph,
paths,
empty_paths_cache,
distances.clone(),
&mut new_file,
);
writeln!(
&mut file,
"{id} {{
link: \"{id}.d2.svg\"
tooltip: \"cost {cost}, universe len: {}\"
}}", universe.len()).unwrap();
},
}}",
universe.len()
)
.unwrap();
}
}
}
writeln!(&mut file, "}}").unwrap();
}
fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, distances: &[(u16, SmallBitmap)], file: &mut File) {
fn query_node_d2_desc(
ctx: &mut SearchContext,
node_idx: usize,
node: &QueryNode,
distances: &[(u16, SmallBitmap)],
file: &mut File,
) {
match &node {
QueryNode::Term(LocatedQueryTerm { value, .. }) => {
match value {
QueryTerm::Phrase { phrase } => {
let phrase = ctx.phrase_interner.get(*phrase);
let phrase_str = phrase.description(&ctx.word_interner);
writeln!(file,"{node_idx} : \"{phrase_str}\"").unwrap();
},
QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db, synonyms, split_words } } => {
let original = ctx.word_interner.get(*original);
writeln!(file,"{node_idx} : \"{original}\" {{
shape: class").unwrap();
for w in zero_typo.iter().copied() {
let w = ctx.word_interner.get(w);
writeln!(file, "\"{w}\" : 0").unwrap();
}
for w in one_typo.iter().copied() {
let w = ctx.word_interner.get(w);
writeln!(file, "\"{w}\" : 1").unwrap();
}
for w in two_typos.iter().copied() {
let w = ctx.word_interner.get(w);
writeln!(file, "\"{w}\" : 2").unwrap();
}
if let Some(split_words) = split_words {
let phrase = ctx.phrase_interner.get(*split_words);
let phrase_str = phrase.description(&ctx.word_interner);
writeln!(file, "\"{phrase_str}\" : split_words").unwrap();
}
for synonym in synonyms.iter().copied() {
let phrase = ctx.phrase_interner.get(synonym);
let phrase_str = phrase.description(&ctx.word_interner);
writeln!(file, "\"{phrase_str}\" : synonym").unwrap();
}
if *use_prefix_db {
writeln!(file, "use prefix DB : true").unwrap();
}
for (d, edges) in distances.iter() {
writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::<Vec<_>>() ).unwrap();
}
writeln!(file, "}}").unwrap();
},
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
QueryTerm::Phrase { phrase } => {
let phrase = ctx.phrase_interner.get(*phrase);
let phrase_str = phrase.description(&ctx.word_interner);
writeln!(file, "{node_idx} : \"{phrase_str}\"").unwrap();
}
QueryTerm::Word {
derivations:
WordDerivations {
original,
zero_typo,
one_typo,
two_typos,
use_prefix_db,
synonyms,
split_words,
},
} => {
let original = ctx.word_interner.get(*original);
writeln!(
file,
"{node_idx} : \"{original}\" {{
shape: class"
)
.unwrap();
for w in zero_typo.iter().copied() {
let w = ctx.word_interner.get(w);
writeln!(file, "\"{w}\" : 0").unwrap();
}
for w in one_typo.iter().copied() {
let w = ctx.word_interner.get(w);
writeln!(file, "\"{w}\" : 1").unwrap();
}
for w in two_typos.iter().copied() {
let w = ctx.word_interner.get(w);
writeln!(file, "\"{w}\" : 2").unwrap();
}
if let Some(split_words) = split_words {
let phrase = ctx.phrase_interner.get(*split_words);
let phrase_str = phrase.description(&ctx.word_interner);
writeln!(file, "\"{phrase_str}\" : split_words").unwrap();
}
for synonym in synonyms.iter().copied() {
let phrase = ctx.phrase_interner.get(synonym);
let phrase_str = phrase.description(&ctx.word_interner);
writeln!(file, "\"{phrase_str}\" : synonym").unwrap();
}
if *use_prefix_db {
writeln!(file, "use prefix DB : true").unwrap();
}
for (d, edges) in distances.iter() {
writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::<Vec<_>>())
.unwrap();
}
writeln!(file, "}}").unwrap();
}
},
QueryNode::Deleted => panic!(),
QueryNode::Start => {
writeln!(file,"{node_idx} : START").unwrap();
},
writeln!(file, "{node_idx} : START").unwrap();
}
QueryNode::End => {
writeln!(file,"{node_idx} : END").unwrap();
},
writeln!(file, "{node_idx} : END").unwrap();
}
}
}
fn query_graph_d2_description(ctx: &mut SearchContext, query_graph: &QueryGraph, file: &mut File) {
writeln!(file,"direction: right").unwrap();
fn query_graph_d2_description(
ctx: &mut SearchContext,
query_graph: &QueryGraph,
file: &mut File,
) {
writeln!(file, "direction: right").unwrap();
for node in 0..query_graph.nodes.len() {
if matches!(query_graph.nodes[node], QueryNode::Deleted) {
continue;
}
Self::query_node_d2_desc(ctx, node, &query_graph.nodes[node], &[], file);
for edge in query_graph.edges[node].successors.iter() {
writeln!(file, "{node} -> {edge};\n").unwrap();
}
}
}
}
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<(u16, SmallBitmap)>>, file: &mut File) {
writeln!(file,"direction: right").unwrap();
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(
ctx: &mut SearchContext,
graph: &RankingRuleGraph<R>,
paths: &[Vec<u16>],
_empty_paths_cache: &EmptyPathsCache,
distances: Vec<Vec<(u16, SmallBitmap)>>,
file: &mut File,
) {
writeln!(file, "direction: right").unwrap();
writeln!(file, "Proximity Graph {{").unwrap();
for (node_idx, node) in graph.query_graph.nodes.iter().enumerate() {
@ -437,17 +539,21 @@ shape: class").unwrap();
match &details {
EdgeDetails::Unconditional => {
writeln!(file,
writeln!(
file,
"{from_node} -> {to_node} : \"always cost {cost}\"",
cost = edge.cost,
).unwrap();
)
.unwrap();
}
EdgeDetails::Data(details) => {
writeln!(file,
writeln!(
file,
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
cost = edge.cost,
edge_label = R::graphviz_edge_details_label(details)
).unwrap();
)
.unwrap();
}
}
}
@ -457,12 +563,11 @@ shape: class").unwrap();
// Self::paths_d2_description(graph, paths, file);
// writeln!(file, "}}").unwrap();
writeln!(file, "Shortest Paths {{").unwrap();
Self::paths_d2_description(ctx, graph, paths, file);
writeln!(file, "}}").unwrap();
// writeln!(file, "Empty Edge Couples {{").unwrap();
// writeln!(file, "Empty Edge Couples {{").unwrap();
// for (i, (e1, e2)) in empty_paths_cache.empty_couple_edges.iter().enumerate() {
// writeln!(file, "{i} : \"\" {{").unwrap();
// Self::edge_d2_description(graph, *e1, file);
@ -478,18 +583,24 @@ shape: class").unwrap();
// }
// writeln!(file, "}}").unwrap();
}
fn edge_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, edge_idx: u16, file: &mut File) {
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
fn edge_d2_description<R: RankingRuleGraphTrait>(
ctx: &mut SearchContext,
graph: &RankingRuleGraph<R>,
edge_idx: u16,
file: &mut File,
) {
let Edge { from_node, to_node, cost, .. } =
graph.all_edges[edge_idx as usize].as_ref().unwrap();
let from_node = &graph.query_graph.nodes[*from_node as usize];
let from_node_desc = match from_node {
QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase { phrase } => {
let phrase = ctx.phrase_interner.get(*phrase);
phrase.description(&ctx.word_interner)
},
}
QueryTerm::Word { derivations } => {
ctx.word_interner.get(derivations.original).to_owned()
},
}
},
QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(),
@ -501,18 +612,29 @@ shape: class").unwrap();
QueryTerm::Phrase { phrase } => {
let phrase = ctx.phrase_interner.get(*phrase);
phrase.description(&ctx.word_interner)
},
QueryTerm::Word { derivations } => ctx.word_interner.get(derivations.original).to_owned(),
}
QueryTerm::Word { derivations } => {
ctx.word_interner.get(derivations.original).to_owned()
}
},
QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(),
QueryNode::End => "END".to_owned(),
};
writeln!(file, "{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
writeln!(
file,
"{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
shape: class
}}").unwrap();
}}"
)
.unwrap();
}
fn paths_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], file: &mut File) {
fn paths_d2_description<R: RankingRuleGraphTrait>(
ctx: &mut SearchContext,
graph: &RankingRuleGraph<R>,
paths: &[Vec<u16>],
file: &mut File,
) {
for (path_idx, edge_indexes) in paths.iter().enumerate() {
writeln!(file, "{path_idx} {{").unwrap();
for edge_idx in edge_indexes.iter() {

View File

@ -3,11 +3,9 @@ pub mod detailed;
use roaring::RoaringBitmap;
use super::{
ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph},
small_bitmap::SmallBitmap,
RankingRule, RankingRuleQueryTrait,
};
use super::ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph};
use super::small_bitmap::SmallBitmap;
use super::{RankingRule, RankingRuleQueryTrait};
pub struct DefaultSearchLogger;
impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {

View File

@ -11,12 +11,8 @@ mod small_bitmap;
mod sort;
mod words;
use self::interner::Interner;
use self::logger::SearchLogger;
use self::query_term::Phrase;
use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
use crate::new::query_term::located_query_terms_from_string;
use crate::{Filter, Index, Result, TermsMatchingStrategy};
use std::collections::BTreeSet;
use charabia::Tokenize;
use db_cache::DatabaseCache;
use heed::RoTxn;
@ -26,7 +22,13 @@ pub use ranking_rules::{
RankingRuleOutputIterWrapper, RankingRuleQueryTrait,
};
use roaring::RoaringBitmap;
use std::collections::BTreeSet;
use self::interner::Interner;
use self::logger::SearchLogger;
use self::query_term::Phrase;
use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
use crate::search::new::query_term::located_query_terms_from_string;
use crate::{Filter, Index, Result, TermsMatchingStrategy};
pub enum BitmapOrAllRef<'s> {
Bitmap(&'s RoaringBitmap),

View File

@ -12,13 +12,12 @@ use heed::types::DecodeIgnore;
use heed::RoTxn;
use itertools::Itertools;
use super::interner::{Interned, Interner};
use super::SearchContext;
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
use crate::search::{build_dfa, get_first};
use crate::{CboRoaringBitmapLenCodec, Index, Result};
use super::interner::{Interned, Interner};
use super::SearchContext;
#[derive(Default, Clone, PartialEq, Eq, Hash)]
pub struct Phrase {
pub words: Vec<Option<Interned<String>>>,

View File

@ -1,8 +1,8 @@
use std::collections::HashSet;
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::small_bitmap::SmallBitmap;
use crate::new::{QueryGraph, SearchContext};
use crate::search::new::small_bitmap::SmallBitmap;
use crate::search::new::{QueryGraph, SearchContext};
use crate::Result;
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {

View File

@ -1,11 +1,12 @@
#![allow(clippy::too_many_arguments)]
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, VecDeque};
use super::empty_paths_cache::EmptyPathsCache;
use super::{RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::small_bitmap::SmallBitmap;
use crate::search::new::small_bitmap::SmallBitmap;
use crate::Result;
use std::collections::btree_map::Entry;
use std::collections::{BTreeMap, VecDeque};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub struct Path {

View File

@ -1,11 +1,12 @@
use std::marker::PhantomData;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::{BitmapOrAllRef, SearchContext};
use crate::Result;
use fxhash::FxHashMap;
use roaring::RoaringBitmap;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::{BitmapOrAllRef, SearchContext};
use crate::Result;
// TODO: the cache should have a G::EdgeDetails as key
// but then it means that we should have a quick way of
// computing their hash and comparing them

View File

@ -1,6 +1,5 @@
use crate::new::small_bitmap::SmallBitmap;
use super::paths_map::PathsMap;
use crate::search::new::small_bitmap::SmallBitmap;
#[derive(Clone)]
pub struct EmptyPathsCache {

View File

@ -6,16 +6,17 @@ mod paths_map;
mod proximity;
mod typo;
use super::logger::SearchLogger;
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext};
use crate::Result;
pub use edge_docids_cache::EdgeDocidsCache;
pub use empty_paths_cache::EmptyPathsCache;
pub use proximity::ProximityGraph;
use roaring::RoaringBitmap;
pub use typo::TypoGraph;
use super::logger::SearchLogger;
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext};
use crate::Result;
#[derive(Debug, Clone)]
pub enum EdgeDetails<E> {
Unconditional,

View File

@ -1,11 +1,10 @@
use crate::new::small_bitmap::SmallBitmap;
use super::cheapest_paths::Path;
use crate::search::new::small_bitmap::SmallBitmap;
// What is PathsMap used for?
// For the empty_prefixes field in the EmptyPathsCache only :/
// but it could be used for more, like efficient computing of a set of paths
#[derive(Debug, Clone)]
pub struct PathsMap<V> {
pub nodes: Vec<(u16, PathsMap<V>)>,
@ -53,10 +52,10 @@ impl<V> PathsMap<V> {
}
}
fn remove_first_rec(&mut self, cur: &mut Vec<u16>) -> (bool, V) {
let Some((first_edge, rest)) = self.nodes.first_mut() else {
let Some((first_edge, rest)) = self.nodes.first_mut() else {
// The PathsMap has to be correct by construction here, otherwise
// the unwrap() will crash
return (true, self.value.take().unwrap())
return (true, self.value.take().unwrap())
};
cur.push(*first_edge);
let (rest_is_empty, value) = rest.remove_first_rec(cur);

View File

@ -1,12 +1,14 @@
use super::ProximityEdge;
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::new::ranking_rule_graph::proximity::WordPair;
use crate::new::ranking_rule_graph::EdgeDetails;
use crate::new::{QueryNode, SearchContext};
use crate::Result;
use itertools::Itertools;
use std::collections::BTreeMap;
use itertools::Itertools;
use super::ProximityEdge;
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::search::new::ranking_rule_graph::proximity::WordPair;
use crate::search::new::ranking_rule_graph::EdgeDetails;
use crate::search::new::{QueryNode, SearchContext};
use crate::Result;
pub fn visit_from_node(
ctx: &mut SearchContext,
from_node: &QueryNode,

View File

@ -1,8 +1,9 @@
use super::{ProximityEdge, WordPair};
use crate::new::SearchContext;
use crate::{CboRoaringBitmapCodec, Result};
use roaring::RoaringBitmap;
use super::{ProximityEdge, WordPair};
use crate::search::new::SearchContext;
use crate::{CboRoaringBitmapCodec, Result};
pub fn compute_docids<'search>(
ctx: &mut SearchContext<'search>,
edge: &ProximityEdge,

View File

@ -1,15 +1,16 @@
pub mod build;
pub mod compute_docids;
use roaring::RoaringBitmap;
use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeDetails, RankingRuleGraphTrait};
use crate::new::interner::Interned;
use crate::new::logger::SearchLogger;
use crate::new::query_term::WordDerivations;
use crate::new::small_bitmap::SmallBitmap;
use crate::new::{QueryGraph, QueryNode, SearchContext};
use crate::search::new::interner::Interned;
use crate::search::new::logger::SearchLogger;
use crate::search::new::query_term::WordDerivations;
use crate::search::new::small_bitmap::SmallBitmap;
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
use crate::Result;
use roaring::RoaringBitmap;
// TODO: intern the proximity edges as well?

View File

@ -1,15 +1,16 @@
use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::interner::Interned;
use crate::new::logger::SearchLogger;
use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
use crate::new::resolve_query_graph::resolve_phrase;
use crate::new::small_bitmap::SmallBitmap;
use crate::new::{QueryGraph, QueryNode, SearchContext};
use crate::{Result, RoaringBitmapCodec};
use heed::BytesDecode;
use roaring::RoaringBitmap;
use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::Interned;
use crate::search::new::logger::SearchLogger;
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
use crate::search::new::resolve_query_graph::resolve_phrase;
use crate::search::new::small_bitmap::SmallBitmap;
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
use crate::{Result, RoaringBitmapCodec};
#[derive(Clone)]
pub enum TypoEdge {
Phrase { phrase: Interned<Phrase> },

View File

@ -1,11 +1,10 @@
use super::logger::SearchLogger;
use super::QueryGraph;
use super::SearchContext;
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
use crate::new::ranking_rule_graph::ProximityGraph;
use crate::new::ranking_rule_graph::TypoGraph;
use crate::new::words::Words;
use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{QueryGraph, SearchContext};
use crate::search::new::graph_based_ranking_rule::GraphBasedRankingRule;
use crate::search::new::ranking_rule_graph::{ProximityGraph, TypoGraph};
use crate::search::new::words::Words;
// use crate::search::new::sort::Sort;
use crate::{Result, TermsMatchingStrategy};
@ -239,16 +238,18 @@ pub fn apply_ranking_rules<'search>(
#[cfg(test)]
mod tests {
// use crate::allocator::ALLOC;
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use crate::new::{execute_search, SearchContext};
use big_s::S;
use heed::EnvOpenOptions;
use maplit::hashset;
use std::fs::File;
use std::io::{BufRead, BufReader, Cursor, Seek};
use std::time::Instant;
// use crate::new::logger::detailed::DetailedSearchLogger;
use crate::new::logger::DefaultSearchLogger;
use big_s::S;
use heed::EnvOpenOptions;
use maplit::hashset;
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
// use crate::search::new::logger::detailed::DetailedSearchLogger;
use crate::search::new::logger::DefaultSearchLogger;
use crate::search::new::{execute_search, SearchContext};
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy};
@ -265,7 +266,7 @@ mod tests {
// loop {
let start = Instant::now();
// let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
// let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
let mut ctx = SearchContext::new(&index, &txn);
let results = execute_search(
&mut ctx,
@ -362,7 +363,7 @@ mod tests {
// loop {
let start = Instant::now();
let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
let mut ctx = SearchContext::new(&index, &txn);
let results = execute_search(
&mut ctx,

View File

@ -1,12 +1,14 @@
use std::collections::VecDeque;
use fxhash::FxHashMap;
use heed::BytesDecode;
use roaring::{MultiOps, RoaringBitmap};
use super::interner::Interned;
use super::query_term::{Phrase, QueryTerm, WordDerivations};
use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, QueryNode, SearchContext};
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
use fxhash::FxHashMap;
use heed::BytesDecode;
use roaring::{MultiOps, RoaringBitmap};
use std::collections::VecDeque;
// TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc.
#[derive(Default)]

View File

@ -1,3 +1,5 @@
use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::{
RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper,
@ -11,7 +13,6 @@ use crate::{
Index,
Result,
};
use roaring::RoaringBitmap;
pub struct Sort<'search, Query> {
field_name: String,

View File

@ -1,9 +1,11 @@
use std::collections::BTreeSet;
use roaring::RoaringBitmap;
use super::logger::SearchLogger;
use super::resolve_query_graph::resolve_query_graph;
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput, SearchContext};
use crate::{Result, TermsMatchingStrategy};
use roaring::RoaringBitmap;
use std::collections::BTreeSet;
pub struct Words {
exhausted: bool,