mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Cargo fmt
This commit is contained in:
parent
10626dddfc
commit
57fa689131
@ -54,8 +54,6 @@ pub static ALLOC: mimalloc::MiMalloc = mimalloc::MiMalloc;
|
|||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod documents;
|
pub mod documents;
|
||||||
|
|
||||||
pub use search::new;
|
|
||||||
|
|
||||||
mod asc_desc;
|
mod asc_desc;
|
||||||
mod criterion;
|
mod criterion;
|
||||||
mod error;
|
mod error;
|
||||||
|
@ -1,8 +1,11 @@
|
|||||||
use super::{interner::Interned, SearchContext};
|
use std::collections::hash_map::Entry;
|
||||||
use crate::Result;
|
|
||||||
use fxhash::FxHashMap;
|
use fxhash::FxHashMap;
|
||||||
use heed::types::ByteSlice;
|
use heed::types::ByteSlice;
|
||||||
use std::collections::hash_map::Entry;
|
|
||||||
|
use super::interner::Interned;
|
||||||
|
use super::SearchContext;
|
||||||
|
use crate::Result;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct DatabaseCache<'search> {
|
pub struct DatabaseCache<'search> {
|
||||||
|
@ -1,13 +1,13 @@
|
|||||||
use super::logger::SearchLogger;
|
|
||||||
use super::ranking_rule_graph::EdgeDocidsCache;
|
|
||||||
use super::ranking_rule_graph::EmptyPathsCache;
|
|
||||||
use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait};
|
|
||||||
use super::small_bitmap::SmallBitmap;
|
|
||||||
use super::SearchContext;
|
|
||||||
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput};
|
|
||||||
use crate::Result;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use super::logger::SearchLogger;
|
||||||
|
use super::ranking_rule_graph::{
|
||||||
|
EdgeDocidsCache, EmptyPathsCache, RankingRuleGraph, RankingRuleGraphTrait,
|
||||||
|
};
|
||||||
|
use super::small_bitmap::SmallBitmap;
|
||||||
|
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||||
|
use crate::Result;
|
||||||
|
|
||||||
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
|
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
|
||||||
id: String,
|
id: String,
|
||||||
state: Option<GraphBasedRankingRuleState<G>>,
|
state: Option<GraphBasedRankingRuleState<G>>,
|
||||||
|
@ -1,7 +1,8 @@
|
|||||||
use fxhash::FxHashMap;
|
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
|
use fxhash::FxHashMap;
|
||||||
|
|
||||||
pub struct Interned<T> {
|
pub struct Interned<T> {
|
||||||
idx: u32,
|
idx: u32,
|
||||||
_phantom: PhantomData<T>,
|
_phantom: PhantomData<T>,
|
||||||
|
@ -1,39 +1,37 @@
|
|||||||
|
use std::fs::File;
|
||||||
|
use std::io::Write;
|
||||||
|
use std::path::PathBuf;
|
||||||
|
use std::time::Instant;
|
||||||
|
|
||||||
use rand::random;
|
use rand::random;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use std::fs::File;
|
|
||||||
use std::time::Instant;
|
|
||||||
use std::{io::Write, path::PathBuf};
|
|
||||||
|
|
||||||
use crate::new::ranking_rule_graph::TypoGraph;
|
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||||
use crate::new::small_bitmap::SmallBitmap;
|
use crate::search::new::ranking_rule_graph::{
|
||||||
use crate::new::{QueryNode, QueryGraph, SearchContext};
|
Edge, EdgeDetails, EmptyPathsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
|
||||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
TypoGraph,
|
||||||
use crate::new::ranking_rule_graph::EmptyPathsCache;
|
|
||||||
use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
|
|
||||||
use crate::new::ranking_rule_graph::{
|
|
||||||
ProximityGraph, RankingRuleGraph,
|
|
||||||
};
|
};
|
||||||
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use super::{RankingRule, SearchLogger};
|
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||||
|
use crate::search::new::{RankingRule, SearchLogger};
|
||||||
|
|
||||||
pub enum SearchEvents {
|
pub enum SearchEvents {
|
||||||
RankingRuleStartIteration {
|
RankingRuleStartIteration {
|
||||||
ranking_rule_idx: usize,
|
ranking_rule_idx: usize,
|
||||||
query: QueryGraph,
|
query: QueryGraph,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
time: Instant
|
time: Instant,
|
||||||
},
|
},
|
||||||
RankingRuleNextBucket {
|
RankingRuleNextBucket {
|
||||||
ranking_rule_idx: usize,
|
ranking_rule_idx: usize,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
candidates: RoaringBitmap,
|
candidates: RoaringBitmap,
|
||||||
time: Instant
|
time: Instant,
|
||||||
},
|
},
|
||||||
RankingRuleEndIteration {
|
RankingRuleEndIteration {
|
||||||
ranking_rule_idx: usize,
|
ranking_rule_idx: usize,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
time: Instant
|
time: Instant,
|
||||||
},
|
},
|
||||||
ExtendResults {
|
ExtendResults {
|
||||||
new: Vec<u32>,
|
new: Vec<u32>,
|
||||||
@ -57,7 +55,11 @@ pub enum SearchEvents {
|
|||||||
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
},
|
},
|
||||||
RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap, time: Instant },
|
RankingRuleSkipBucket {
|
||||||
|
ranking_rule_idx: usize,
|
||||||
|
candidates: RoaringBitmap,
|
||||||
|
time: Instant,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct DetailedSearchLogger {
|
pub struct DetailedSearchLogger {
|
||||||
@ -106,7 +108,6 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
||||||
query: &QueryGraph,
|
query: &QueryGraph,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
|
|
||||||
) {
|
) {
|
||||||
self.events.push(SearchEvents::RankingRuleStartIteration {
|
self.events.push(SearchEvents::RankingRuleStartIteration {
|
||||||
ranking_rule_idx,
|
ranking_rule_idx,
|
||||||
@ -122,7 +123,6 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
|
|
||||||
) {
|
) {
|
||||||
self.events.push(SearchEvents::RankingRuleNextBucket {
|
self.events.push(SearchEvents::RankingRuleNextBucket {
|
||||||
ranking_rule_idx,
|
ranking_rule_idx,
|
||||||
@ -136,12 +136,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
ranking_rule_idx: usize,
|
ranking_rule_idx: usize,
|
||||||
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
||||||
candidates: &RoaringBitmap,
|
candidates: &RoaringBitmap,
|
||||||
|
|
||||||
) {
|
) {
|
||||||
self.events.push(SearchEvents::RankingRuleSkipBucket {
|
self.events.push(SearchEvents::RankingRuleSkipBucket {
|
||||||
ranking_rule_idx,
|
ranking_rule_idx,
|
||||||
candidates: candidates.clone(),
|
candidates: candidates.clone(),
|
||||||
time: Instant::now()
|
time: Instant::now(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -150,12 +149,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
ranking_rule_idx: usize,
|
ranking_rule_idx: usize,
|
||||||
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
_ranking_rule: &dyn RankingRule<'transaction, QueryGraph>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
|
|
||||||
) {
|
) {
|
||||||
self.events.push(SearchEvents::RankingRuleEndIteration {
|
self.events.push(SearchEvents::RankingRuleEndIteration {
|
||||||
ranking_rule_idx,
|
ranking_rule_idx,
|
||||||
universe: universe.clone(),
|
universe: universe.clone(),
|
||||||
time: Instant::now()
|
time: Instant::now(),
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
fn add_to_results(&mut self, docids: &[u32]) {
|
fn add_to_results(&mut self, docids: &[u32]) {
|
||||||
@ -166,18 +164,47 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
|
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
|
||||||
}
|
}
|
||||||
|
|
||||||
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<(u16, SmallBitmap)>>, cost: u16,) {
|
fn log_proximity_state(
|
||||||
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
&mut self,
|
||||||
}
|
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||||
|
paths_map: &[Vec<u16>],
|
||||||
fn log_typo_state(&mut self, query_graph: &RankingRuleGraph<TypoGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<(u16, SmallBitmap)>>, cost: u16,) {
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
universe: &RoaringBitmap,
|
||||||
|
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||||
|
cost: u16,
|
||||||
|
) {
|
||||||
|
self.events.push(SearchEvents::ProximityState {
|
||||||
|
graph: query_graph.clone(),
|
||||||
|
paths: paths_map.to_vec(),
|
||||||
|
empty_paths_cache: empty_paths_cache.clone(),
|
||||||
|
universe: universe.clone(),
|
||||||
|
distances,
|
||||||
|
cost,
|
||||||
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn log_typo_state(
|
||||||
|
&mut self,
|
||||||
|
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
|
paths_map: &[Vec<u16>],
|
||||||
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
|
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||||
|
cost: u16,
|
||||||
|
) {
|
||||||
|
self.events.push(SearchEvents::TypoState {
|
||||||
|
graph: query_graph.clone(),
|
||||||
|
paths: paths_map.to_vec(),
|
||||||
|
empty_paths_cache: empty_paths_cache.clone(),
|
||||||
|
universe: universe.clone(),
|
||||||
|
distances,
|
||||||
|
cost,
|
||||||
|
})
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl DetailedSearchLogger {
|
impl DetailedSearchLogger {
|
||||||
pub fn write_d2_description(&self,ctx: &mut SearchContext,) {
|
pub fn write_d2_description(&self, ctx: &mut SearchContext) {
|
||||||
let mut prev_time = self.initial_query_time.unwrap();
|
let mut prev_time = self.initial_query_time.unwrap();
|
||||||
let mut timestamp = vec![];
|
let mut timestamp = vec![];
|
||||||
fn activated_id(timestamp: &[usize]) -> String {
|
fn activated_id(timestamp: &[usize]) -> String {
|
||||||
@ -229,21 +256,29 @@ impl DetailedSearchLogger {
|
|||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
writeln!(&mut file,
|
writeln!(
|
||||||
"{ranking_rule_idx}.{self_activated_id} {{
|
&mut file,
|
||||||
|
"{ranking_rule_idx}.{self_activated_id} {{
|
||||||
style {{
|
style {{
|
||||||
fill: \"#D8A7B1\"
|
fill: \"#D8A7B1\"
|
||||||
}}
|
}}
|
||||||
}}").unwrap();
|
}}"
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
SearchEvents::RankingRuleNextBucket { ranking_rule_idx, time, universe, candidates } => {
|
SearchEvents::RankingRuleNextBucket {
|
||||||
|
ranking_rule_idx,
|
||||||
|
time,
|
||||||
|
universe,
|
||||||
|
candidates,
|
||||||
|
} => {
|
||||||
let _elapsed = time.duration_since(prev_time);
|
let _elapsed = time.duration_since(prev_time);
|
||||||
prev_time = *time;
|
prev_time = *time;
|
||||||
let old_activated_id = activated_id(×tamp);
|
let old_activated_id = activated_id(×tamp);
|
||||||
// writeln!(&mut file, "time.{old_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap();
|
// writeln!(&mut file, "time.{old_activated_id}: {:.2}", elapsed.as_micros() as f64 / 1000.0).unwrap();
|
||||||
*timestamp.last_mut().unwrap() += 1;
|
*timestamp.last_mut().unwrap() += 1;
|
||||||
let next_activated_id = activated_id(×tamp);
|
let next_activated_id = activated_id(×tamp);
|
||||||
writeln!(&mut file,
|
writeln!(&mut file,
|
||||||
"{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : next bucket {}/{}", candidates.len(), universe.len())
|
"{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : next bucket {}/{}", candidates.len(), universe.len())
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
@ -255,7 +290,7 @@ impl DetailedSearchLogger {
|
|||||||
*timestamp.last_mut().unwrap() += 1;
|
*timestamp.last_mut().unwrap() += 1;
|
||||||
let next_activated_id = activated_id(×tamp);
|
let next_activated_id = activated_id(×tamp);
|
||||||
let len = candidates.len();
|
let len = candidates.len();
|
||||||
writeln!(&mut file,
|
writeln!(&mut file,
|
||||||
"{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : skip bucket ({len})",)
|
"{ranking_rule_idx}.{old_activated_id} -> {ranking_rule_idx}.{next_activated_id} : skip bucket ({len})",)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
}
|
}
|
||||||
@ -280,14 +315,14 @@ impl DetailedSearchLogger {
|
|||||||
}
|
}
|
||||||
SearchEvents::ExtendResults { new } => {
|
SearchEvents::ExtendResults { new } => {
|
||||||
if new.is_empty() {
|
if new.is_empty() {
|
||||||
continue
|
continue;
|
||||||
}
|
}
|
||||||
let cur_ranking_rule = timestamp.len() - 1;
|
let cur_ranking_rule = timestamp.len() - 1;
|
||||||
let cur_activated_id = activated_id(×tamp);
|
let cur_activated_id = activated_id(×tamp);
|
||||||
let docids = new.iter().collect::<Vec<_>>();
|
let docids = new.iter().collect::<Vec<_>>();
|
||||||
let len = new.len();
|
let len = new.len();
|
||||||
let random = random::<u64>();
|
let random = random::<u64>();
|
||||||
|
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut file,
|
&mut file,
|
||||||
"{cur_ranking_rule}.{cur_activated_id} -> results.{random} : \"add {len}\"
|
"{cur_ranking_rule}.{cur_activated_id} -> results.{random} : \"add {len}\"
|
||||||
@ -300,7 +335,7 @@ results.{random} {{
|
|||||||
"
|
"
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
},
|
}
|
||||||
SearchEvents::WordsState { query_graph } => {
|
SearchEvents::WordsState { query_graph } => {
|
||||||
let cur_ranking_rule = timestamp.len() - 1;
|
let cur_ranking_rule = timestamp.len() - 1;
|
||||||
*timestamp.last_mut().unwrap() += 1;
|
*timestamp.last_mut().unwrap() += 1;
|
||||||
@ -314,9 +349,18 @@ results.{random} {{
|
|||||||
&mut file,
|
&mut file,
|
||||||
"{id} {{
|
"{id} {{
|
||||||
link: \"{id}.d2.svg\"
|
link: \"{id}.d2.svg\"
|
||||||
}}").unwrap();
|
}}"
|
||||||
},
|
)
|
||||||
SearchEvents::ProximityState { graph, paths, empty_paths_cache, universe, distances, cost } => {
|
.unwrap();
|
||||||
|
}
|
||||||
|
SearchEvents::ProximityState {
|
||||||
|
graph,
|
||||||
|
paths,
|
||||||
|
empty_paths_cache,
|
||||||
|
universe,
|
||||||
|
distances,
|
||||||
|
cost,
|
||||||
|
} => {
|
||||||
let cur_ranking_rule = timestamp.len() - 1;
|
let cur_ranking_rule = timestamp.len() - 1;
|
||||||
*timestamp.last_mut().unwrap() += 1;
|
*timestamp.last_mut().unwrap() += 1;
|
||||||
let cur_activated_id = activated_id(×tamp);
|
let cur_activated_id = activated_id(×tamp);
|
||||||
@ -324,15 +368,32 @@ results.{random} {{
|
|||||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||||
Self::ranking_rule_graph_d2_description(ctx, graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
|
Self::ranking_rule_graph_d2_description(
|
||||||
|
ctx,
|
||||||
|
graph,
|
||||||
|
paths,
|
||||||
|
empty_paths_cache,
|
||||||
|
distances.clone(),
|
||||||
|
&mut new_file,
|
||||||
|
);
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut file,
|
&mut file,
|
||||||
"{id} {{
|
"{id} {{
|
||||||
link: \"{id}.d2.svg\"
|
link: \"{id}.d2.svg\"
|
||||||
tooltip: \"cost {cost}, universe len: {}\"
|
tooltip: \"cost {cost}, universe len: {}\"
|
||||||
}}", universe.len()).unwrap();
|
}}",
|
||||||
},
|
universe.len()
|
||||||
SearchEvents::TypoState { graph, paths, empty_paths_cache, universe, distances, cost } => {
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
SearchEvents::TypoState {
|
||||||
|
graph,
|
||||||
|
paths,
|
||||||
|
empty_paths_cache,
|
||||||
|
universe,
|
||||||
|
distances,
|
||||||
|
cost,
|
||||||
|
} => {
|
||||||
let cur_ranking_rule = timestamp.len() - 1;
|
let cur_ranking_rule = timestamp.len() - 1;
|
||||||
*timestamp.last_mut().unwrap() += 1;
|
*timestamp.last_mut().unwrap() += 1;
|
||||||
let cur_activated_id = activated_id(×tamp);
|
let cur_activated_id = activated_id(×tamp);
|
||||||
@ -340,89 +401,130 @@ results.{random} {{
|
|||||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||||
Self::ranking_rule_graph_d2_description(ctx,graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
|
Self::ranking_rule_graph_d2_description(
|
||||||
|
ctx,
|
||||||
|
graph,
|
||||||
|
paths,
|
||||||
|
empty_paths_cache,
|
||||||
|
distances.clone(),
|
||||||
|
&mut new_file,
|
||||||
|
);
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut file,
|
&mut file,
|
||||||
"{id} {{
|
"{id} {{
|
||||||
link: \"{id}.d2.svg\"
|
link: \"{id}.d2.svg\"
|
||||||
tooltip: \"cost {cost}, universe len: {}\"
|
tooltip: \"cost {cost}, universe len: {}\"
|
||||||
}}", universe.len()).unwrap();
|
}}",
|
||||||
},
|
universe.len()
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
writeln!(&mut file, "}}").unwrap();
|
writeln!(&mut file, "}}").unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, distances: &[(u16, SmallBitmap)], file: &mut File) {
|
fn query_node_d2_desc(
|
||||||
|
ctx: &mut SearchContext,
|
||||||
|
node_idx: usize,
|
||||||
|
node: &QueryNode,
|
||||||
|
distances: &[(u16, SmallBitmap)],
|
||||||
|
file: &mut File,
|
||||||
|
) {
|
||||||
match &node {
|
match &node {
|
||||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => {
|
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
|
||||||
match value {
|
QueryTerm::Phrase { phrase } => {
|
||||||
QueryTerm::Phrase { phrase } => {
|
let phrase = ctx.phrase_interner.get(*phrase);
|
||||||
let phrase = ctx.phrase_interner.get(*phrase);
|
let phrase_str = phrase.description(&ctx.word_interner);
|
||||||
let phrase_str = phrase.description(&ctx.word_interner);
|
writeln!(file, "{node_idx} : \"{phrase_str}\"").unwrap();
|
||||||
writeln!(file,"{node_idx} : \"{phrase_str}\"").unwrap();
|
}
|
||||||
},
|
QueryTerm::Word {
|
||||||
QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db, synonyms, split_words } } => {
|
derivations:
|
||||||
let original = ctx.word_interner.get(*original);
|
WordDerivations {
|
||||||
writeln!(file,"{node_idx} : \"{original}\" {{
|
original,
|
||||||
shape: class").unwrap();
|
zero_typo,
|
||||||
for w in zero_typo.iter().copied() {
|
one_typo,
|
||||||
let w = ctx.word_interner.get(w);
|
two_typos,
|
||||||
writeln!(file, "\"{w}\" : 0").unwrap();
|
use_prefix_db,
|
||||||
}
|
synonyms,
|
||||||
for w in one_typo.iter().copied() {
|
split_words,
|
||||||
let w = ctx.word_interner.get(w);
|
},
|
||||||
writeln!(file, "\"{w}\" : 1").unwrap();
|
} => {
|
||||||
}
|
let original = ctx.word_interner.get(*original);
|
||||||
for w in two_typos.iter().copied() {
|
writeln!(
|
||||||
let w = ctx.word_interner.get(w);
|
file,
|
||||||
writeln!(file, "\"{w}\" : 2").unwrap();
|
"{node_idx} : \"{original}\" {{
|
||||||
}
|
shape: class"
|
||||||
if let Some(split_words) = split_words {
|
)
|
||||||
let phrase = ctx.phrase_interner.get(*split_words);
|
.unwrap();
|
||||||
let phrase_str = phrase.description(&ctx.word_interner);
|
for w in zero_typo.iter().copied() {
|
||||||
writeln!(file, "\"{phrase_str}\" : split_words").unwrap();
|
let w = ctx.word_interner.get(w);
|
||||||
}
|
writeln!(file, "\"{w}\" : 0").unwrap();
|
||||||
for synonym in synonyms.iter().copied() {
|
}
|
||||||
let phrase = ctx.phrase_interner.get(synonym);
|
for w in one_typo.iter().copied() {
|
||||||
let phrase_str = phrase.description(&ctx.word_interner);
|
let w = ctx.word_interner.get(w);
|
||||||
writeln!(file, "\"{phrase_str}\" : synonym").unwrap();
|
writeln!(file, "\"{w}\" : 1").unwrap();
|
||||||
}
|
}
|
||||||
if *use_prefix_db {
|
for w in two_typos.iter().copied() {
|
||||||
writeln!(file, "use prefix DB : true").unwrap();
|
let w = ctx.word_interner.get(w);
|
||||||
}
|
writeln!(file, "\"{w}\" : 2").unwrap();
|
||||||
for (d, edges) in distances.iter() {
|
}
|
||||||
writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::<Vec<_>>() ).unwrap();
|
if let Some(split_words) = split_words {
|
||||||
}
|
let phrase = ctx.phrase_interner.get(*split_words);
|
||||||
|
let phrase_str = phrase.description(&ctx.word_interner);
|
||||||
writeln!(file, "}}").unwrap();
|
writeln!(file, "\"{phrase_str}\" : split_words").unwrap();
|
||||||
},
|
}
|
||||||
|
for synonym in synonyms.iter().copied() {
|
||||||
|
let phrase = ctx.phrase_interner.get(synonym);
|
||||||
|
let phrase_str = phrase.description(&ctx.word_interner);
|
||||||
|
writeln!(file, "\"{phrase_str}\" : synonym").unwrap();
|
||||||
|
}
|
||||||
|
if *use_prefix_db {
|
||||||
|
writeln!(file, "use prefix DB : true").unwrap();
|
||||||
|
}
|
||||||
|
for (d, edges) in distances.iter() {
|
||||||
|
writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::<Vec<_>>())
|
||||||
|
.unwrap();
|
||||||
|
}
|
||||||
|
|
||||||
|
writeln!(file, "}}").unwrap();
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
QueryNode::Deleted => panic!(),
|
QueryNode::Deleted => panic!(),
|
||||||
QueryNode::Start => {
|
QueryNode::Start => {
|
||||||
writeln!(file,"{node_idx} : START").unwrap();
|
writeln!(file, "{node_idx} : START").unwrap();
|
||||||
},
|
}
|
||||||
QueryNode::End => {
|
QueryNode::End => {
|
||||||
writeln!(file,"{node_idx} : END").unwrap();
|
writeln!(file, "{node_idx} : END").unwrap();
|
||||||
},
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn query_graph_d2_description(ctx: &mut SearchContext, query_graph: &QueryGraph, file: &mut File) {
|
fn query_graph_d2_description(
|
||||||
writeln!(file,"direction: right").unwrap();
|
ctx: &mut SearchContext,
|
||||||
|
query_graph: &QueryGraph,
|
||||||
|
file: &mut File,
|
||||||
|
) {
|
||||||
|
writeln!(file, "direction: right").unwrap();
|
||||||
for node in 0..query_graph.nodes.len() {
|
for node in 0..query_graph.nodes.len() {
|
||||||
if matches!(query_graph.nodes[node], QueryNode::Deleted) {
|
if matches!(query_graph.nodes[node], QueryNode::Deleted) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
Self::query_node_d2_desc(ctx, node, &query_graph.nodes[node], &[], file);
|
Self::query_node_d2_desc(ctx, node, &query_graph.nodes[node], &[], file);
|
||||||
|
|
||||||
for edge in query_graph.edges[node].successors.iter() {
|
for edge in query_graph.edges[node].successors.iter() {
|
||||||
writeln!(file, "{node} -> {edge};\n").unwrap();
|
writeln!(file, "{node} -> {edge};\n").unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<(u16, SmallBitmap)>>, file: &mut File) {
|
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(
|
||||||
writeln!(file,"direction: right").unwrap();
|
ctx: &mut SearchContext,
|
||||||
|
graph: &RankingRuleGraph<R>,
|
||||||
|
paths: &[Vec<u16>],
|
||||||
|
_empty_paths_cache: &EmptyPathsCache,
|
||||||
|
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||||
|
file: &mut File,
|
||||||
|
) {
|
||||||
|
writeln!(file, "direction: right").unwrap();
|
||||||
|
|
||||||
writeln!(file, "Proximity Graph {{").unwrap();
|
writeln!(file, "Proximity Graph {{").unwrap();
|
||||||
for (node_idx, node) in graph.query_graph.nodes.iter().enumerate() {
|
for (node_idx, node) in graph.query_graph.nodes.iter().enumerate() {
|
||||||
@ -437,17 +539,21 @@ shape: class").unwrap();
|
|||||||
|
|
||||||
match &details {
|
match &details {
|
||||||
EdgeDetails::Unconditional => {
|
EdgeDetails::Unconditional => {
|
||||||
writeln!(file,
|
writeln!(
|
||||||
|
file,
|
||||||
"{from_node} -> {to_node} : \"always cost {cost}\"",
|
"{from_node} -> {to_node} : \"always cost {cost}\"",
|
||||||
cost = edge.cost,
|
cost = edge.cost,
|
||||||
).unwrap();
|
)
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
EdgeDetails::Data(details) => {
|
EdgeDetails::Data(details) => {
|
||||||
writeln!(file,
|
writeln!(
|
||||||
|
file,
|
||||||
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
|
"{from_node} -> {to_node} : \"cost {cost} {edge_label}\"",
|
||||||
cost = edge.cost,
|
cost = edge.cost,
|
||||||
edge_label = R::graphviz_edge_details_label(details)
|
edge_label = R::graphviz_edge_details_label(details)
|
||||||
).unwrap();
|
)
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -457,12 +563,11 @@ shape: class").unwrap();
|
|||||||
// Self::paths_d2_description(graph, paths, file);
|
// Self::paths_d2_description(graph, paths, file);
|
||||||
// writeln!(file, "}}").unwrap();
|
// writeln!(file, "}}").unwrap();
|
||||||
|
|
||||||
|
|
||||||
writeln!(file, "Shortest Paths {{").unwrap();
|
writeln!(file, "Shortest Paths {{").unwrap();
|
||||||
Self::paths_d2_description(ctx, graph, paths, file);
|
Self::paths_d2_description(ctx, graph, paths, file);
|
||||||
writeln!(file, "}}").unwrap();
|
writeln!(file, "}}").unwrap();
|
||||||
|
|
||||||
// writeln!(file, "Empty Edge Couples {{").unwrap();
|
// writeln!(file, "Empty Edge Couples {{").unwrap();
|
||||||
// for (i, (e1, e2)) in empty_paths_cache.empty_couple_edges.iter().enumerate() {
|
// for (i, (e1, e2)) in empty_paths_cache.empty_couple_edges.iter().enumerate() {
|
||||||
// writeln!(file, "{i} : \"\" {{").unwrap();
|
// writeln!(file, "{i} : \"\" {{").unwrap();
|
||||||
// Self::edge_d2_description(graph, *e1, file);
|
// Self::edge_d2_description(graph, *e1, file);
|
||||||
@ -478,18 +583,24 @@ shape: class").unwrap();
|
|||||||
// }
|
// }
|
||||||
// writeln!(file, "}}").unwrap();
|
// writeln!(file, "}}").unwrap();
|
||||||
}
|
}
|
||||||
fn edge_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, edge_idx: u16, file: &mut File) {
|
fn edge_d2_description<R: RankingRuleGraphTrait>(
|
||||||
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
|
ctx: &mut SearchContext,
|
||||||
|
graph: &RankingRuleGraph<R>,
|
||||||
|
edge_idx: u16,
|
||||||
|
file: &mut File,
|
||||||
|
) {
|
||||||
|
let Edge { from_node, to_node, cost, .. } =
|
||||||
|
graph.all_edges[edge_idx as usize].as_ref().unwrap();
|
||||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
||||||
let from_node_desc = match from_node {
|
let from_node_desc = match from_node {
|
||||||
QueryNode::Term(term) => match &term.value {
|
QueryNode::Term(term) => match &term.value {
|
||||||
QueryTerm::Phrase { phrase } => {
|
QueryTerm::Phrase { phrase } => {
|
||||||
let phrase = ctx.phrase_interner.get(*phrase);
|
let phrase = ctx.phrase_interner.get(*phrase);
|
||||||
phrase.description(&ctx.word_interner)
|
phrase.description(&ctx.word_interner)
|
||||||
},
|
}
|
||||||
QueryTerm::Word { derivations } => {
|
QueryTerm::Word { derivations } => {
|
||||||
ctx.word_interner.get(derivations.original).to_owned()
|
ctx.word_interner.get(derivations.original).to_owned()
|
||||||
},
|
}
|
||||||
},
|
},
|
||||||
QueryNode::Deleted => panic!(),
|
QueryNode::Deleted => panic!(),
|
||||||
QueryNode::Start => "START".to_owned(),
|
QueryNode::Start => "START".to_owned(),
|
||||||
@ -501,18 +612,29 @@ shape: class").unwrap();
|
|||||||
QueryTerm::Phrase { phrase } => {
|
QueryTerm::Phrase { phrase } => {
|
||||||
let phrase = ctx.phrase_interner.get(*phrase);
|
let phrase = ctx.phrase_interner.get(*phrase);
|
||||||
phrase.description(&ctx.word_interner)
|
phrase.description(&ctx.word_interner)
|
||||||
},
|
}
|
||||||
QueryTerm::Word { derivations } => ctx.word_interner.get(derivations.original).to_owned(),
|
QueryTerm::Word { derivations } => {
|
||||||
|
ctx.word_interner.get(derivations.original).to_owned()
|
||||||
|
}
|
||||||
},
|
},
|
||||||
QueryNode::Deleted => panic!(),
|
QueryNode::Deleted => panic!(),
|
||||||
QueryNode::Start => "START".to_owned(),
|
QueryNode::Start => "START".to_owned(),
|
||||||
QueryNode::End => "END".to_owned(),
|
QueryNode::End => "END".to_owned(),
|
||||||
};
|
};
|
||||||
writeln!(file, "{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
|
writeln!(
|
||||||
|
file,
|
||||||
|
"{edge_idx}: \"{from_node_desc}->{to_node_desc} [{cost}]\" {{
|
||||||
shape: class
|
shape: class
|
||||||
}}").unwrap();
|
}}"
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
}
|
}
|
||||||
fn paths_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], file: &mut File) {
|
fn paths_d2_description<R: RankingRuleGraphTrait>(
|
||||||
|
ctx: &mut SearchContext,
|
||||||
|
graph: &RankingRuleGraph<R>,
|
||||||
|
paths: &[Vec<u16>],
|
||||||
|
file: &mut File,
|
||||||
|
) {
|
||||||
for (path_idx, edge_indexes) in paths.iter().enumerate() {
|
for (path_idx, edge_indexes) in paths.iter().enumerate() {
|
||||||
writeln!(file, "{path_idx} {{").unwrap();
|
writeln!(file, "{path_idx} {{").unwrap();
|
||||||
for edge_idx in edge_indexes.iter() {
|
for edge_idx in edge_indexes.iter() {
|
||||||
|
@ -3,11 +3,9 @@ pub mod detailed;
|
|||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{
|
use super::ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph};
|
||||||
ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph},
|
use super::small_bitmap::SmallBitmap;
|
||||||
small_bitmap::SmallBitmap,
|
use super::{RankingRule, RankingRuleQueryTrait};
|
||||||
RankingRule, RankingRuleQueryTrait,
|
|
||||||
};
|
|
||||||
|
|
||||||
pub struct DefaultSearchLogger;
|
pub struct DefaultSearchLogger;
|
||||||
impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
||||||
|
@ -11,12 +11,8 @@ mod small_bitmap;
|
|||||||
mod sort;
|
mod sort;
|
||||||
mod words;
|
mod words;
|
||||||
|
|
||||||
use self::interner::Interner;
|
use std::collections::BTreeSet;
|
||||||
use self::logger::SearchLogger;
|
|
||||||
use self::query_term::Phrase;
|
|
||||||
use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
|
|
||||||
use crate::new::query_term::located_query_terms_from_string;
|
|
||||||
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
|
||||||
use charabia::Tokenize;
|
use charabia::Tokenize;
|
||||||
use db_cache::DatabaseCache;
|
use db_cache::DatabaseCache;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
@ -26,7 +22,13 @@ pub use ranking_rules::{
|
|||||||
RankingRuleOutputIterWrapper, RankingRuleQueryTrait,
|
RankingRuleOutputIterWrapper, RankingRuleQueryTrait,
|
||||||
};
|
};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use std::collections::BTreeSet;
|
|
||||||
|
use self::interner::Interner;
|
||||||
|
use self::logger::SearchLogger;
|
||||||
|
use self::query_term::Phrase;
|
||||||
|
use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
|
||||||
|
use crate::search::new::query_term::located_query_terms_from_string;
|
||||||
|
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
||||||
|
|
||||||
pub enum BitmapOrAllRef<'s> {
|
pub enum BitmapOrAllRef<'s> {
|
||||||
Bitmap(&'s RoaringBitmap),
|
Bitmap(&'s RoaringBitmap),
|
||||||
|
@ -12,13 +12,12 @@ use heed::types::DecodeIgnore;
|
|||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use itertools::Itertools;
|
use itertools::Itertools;
|
||||||
|
|
||||||
|
use super::interner::{Interned, Interner};
|
||||||
|
use super::SearchContext;
|
||||||
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||||
use crate::search::{build_dfa, get_first};
|
use crate::search::{build_dfa, get_first};
|
||||||
use crate::{CboRoaringBitmapLenCodec, Index, Result};
|
use crate::{CboRoaringBitmapLenCodec, Index, Result};
|
||||||
|
|
||||||
use super::interner::{Interned, Interner};
|
|
||||||
use super::SearchContext;
|
|
||||||
|
|
||||||
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct Phrase {
|
pub struct Phrase {
|
||||||
pub words: Vec<Option<Interned<String>>>,
|
pub words: Vec<Option<Interned<String>>>,
|
||||||
|
@ -1,8 +1,8 @@
|
|||||||
use std::collections::HashSet;
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use crate::new::{QueryGraph, SearchContext};
|
use crate::search::new::{QueryGraph, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
#![allow(clippy::too_many_arguments)]
|
#![allow(clippy::too_many_arguments)]
|
||||||
|
|
||||||
|
use std::collections::btree_map::Entry;
|
||||||
|
use std::collections::{BTreeMap, VecDeque};
|
||||||
|
|
||||||
use super::empty_paths_cache::EmptyPathsCache;
|
use super::empty_paths_cache::EmptyPathsCache;
|
||||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
use std::collections::btree_map::Entry;
|
|
||||||
use std::collections::{BTreeMap, VecDeque};
|
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct Path {
|
pub struct Path {
|
||||||
|
@ -1,11 +1,12 @@
|
|||||||
use std::marker::PhantomData;
|
use std::marker::PhantomData;
|
||||||
|
|
||||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
|
||||||
use crate::new::{BitmapOrAllRef, SearchContext};
|
|
||||||
use crate::Result;
|
|
||||||
use fxhash::FxHashMap;
|
use fxhash::FxHashMap;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
|
use crate::search::new::{BitmapOrAllRef, SearchContext};
|
||||||
|
use crate::Result;
|
||||||
|
|
||||||
// TODO: the cache should have a G::EdgeDetails as key
|
// TODO: the cache should have a G::EdgeDetails as key
|
||||||
// but then it means that we should have a quick way of
|
// but then it means that we should have a quick way of
|
||||||
// computing their hash and comparing them
|
// computing their hash and comparing them
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
use crate::new::small_bitmap::SmallBitmap;
|
|
||||||
|
|
||||||
use super::paths_map::PathsMap;
|
use super::paths_map::PathsMap;
|
||||||
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct EmptyPathsCache {
|
pub struct EmptyPathsCache {
|
||||||
|
@ -6,16 +6,17 @@ mod paths_map;
|
|||||||
mod proximity;
|
mod proximity;
|
||||||
mod typo;
|
mod typo;
|
||||||
|
|
||||||
use super::logger::SearchLogger;
|
|
||||||
use super::small_bitmap::SmallBitmap;
|
|
||||||
use super::{QueryGraph, QueryNode, SearchContext};
|
|
||||||
use crate::Result;
|
|
||||||
pub use edge_docids_cache::EdgeDocidsCache;
|
pub use edge_docids_cache::EdgeDocidsCache;
|
||||||
pub use empty_paths_cache::EmptyPathsCache;
|
pub use empty_paths_cache::EmptyPathsCache;
|
||||||
pub use proximity::ProximityGraph;
|
pub use proximity::ProximityGraph;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
pub use typo::TypoGraph;
|
pub use typo::TypoGraph;
|
||||||
|
|
||||||
|
use super::logger::SearchLogger;
|
||||||
|
use super::small_bitmap::SmallBitmap;
|
||||||
|
use super::{QueryGraph, QueryNode, SearchContext};
|
||||||
|
use crate::Result;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub enum EdgeDetails<E> {
|
pub enum EdgeDetails<E> {
|
||||||
Unconditional,
|
Unconditional,
|
||||||
|
@ -1,11 +1,10 @@
|
|||||||
use crate::new::small_bitmap::SmallBitmap;
|
|
||||||
use super::cheapest_paths::Path;
|
use super::cheapest_paths::Path;
|
||||||
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
|
|
||||||
// What is PathsMap used for?
|
// What is PathsMap used for?
|
||||||
// For the empty_prefixes field in the EmptyPathsCache only :/
|
// For the empty_prefixes field in the EmptyPathsCache only :/
|
||||||
// but it could be used for more, like efficient computing of a set of paths
|
// but it could be used for more, like efficient computing of a set of paths
|
||||||
|
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct PathsMap<V> {
|
pub struct PathsMap<V> {
|
||||||
pub nodes: Vec<(u16, PathsMap<V>)>,
|
pub nodes: Vec<(u16, PathsMap<V>)>,
|
||||||
@ -53,10 +52,10 @@ impl<V> PathsMap<V> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn remove_first_rec(&mut self, cur: &mut Vec<u16>) -> (bool, V) {
|
fn remove_first_rec(&mut self, cur: &mut Vec<u16>) -> (bool, V) {
|
||||||
let Some((first_edge, rest)) = self.nodes.first_mut() else {
|
let Some((first_edge, rest)) = self.nodes.first_mut() else {
|
||||||
// The PathsMap has to be correct by construction here, otherwise
|
// The PathsMap has to be correct by construction here, otherwise
|
||||||
// the unwrap() will crash
|
// the unwrap() will crash
|
||||||
return (true, self.value.take().unwrap())
|
return (true, self.value.take().unwrap())
|
||||||
};
|
};
|
||||||
cur.push(*first_edge);
|
cur.push(*first_edge);
|
||||||
let (rest_is_empty, value) = rest.remove_first_rec(cur);
|
let (rest_is_empty, value) = rest.remove_first_rec(cur);
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
use super::ProximityEdge;
|
|
||||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
|
||||||
use crate::new::ranking_rule_graph::proximity::WordPair;
|
|
||||||
use crate::new::ranking_rule_graph::EdgeDetails;
|
|
||||||
use crate::new::{QueryNode, SearchContext};
|
|
||||||
use crate::Result;
|
|
||||||
use itertools::Itertools;
|
|
||||||
use std::collections::BTreeMap;
|
use std::collections::BTreeMap;
|
||||||
|
|
||||||
|
use itertools::Itertools;
|
||||||
|
|
||||||
|
use super::ProximityEdge;
|
||||||
|
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||||
|
use crate::search::new::ranking_rule_graph::proximity::WordPair;
|
||||||
|
use crate::search::new::ranking_rule_graph::EdgeDetails;
|
||||||
|
use crate::search::new::{QueryNode, SearchContext};
|
||||||
|
use crate::Result;
|
||||||
|
|
||||||
pub fn visit_from_node(
|
pub fn visit_from_node(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
from_node: &QueryNode,
|
from_node: &QueryNode,
|
||||||
|
@ -1,8 +1,9 @@
|
|||||||
use super::{ProximityEdge, WordPair};
|
|
||||||
use crate::new::SearchContext;
|
|
||||||
use crate::{CboRoaringBitmapCodec, Result};
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use super::{ProximityEdge, WordPair};
|
||||||
|
use crate::search::new::SearchContext;
|
||||||
|
use crate::{CboRoaringBitmapCodec, Result};
|
||||||
|
|
||||||
pub fn compute_docids<'search>(
|
pub fn compute_docids<'search>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
edge: &ProximityEdge,
|
edge: &ProximityEdge,
|
||||||
|
@ -1,15 +1,16 @@
|
|||||||
pub mod build;
|
pub mod build;
|
||||||
pub mod compute_docids;
|
pub mod compute_docids;
|
||||||
|
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::empty_paths_cache::EmptyPathsCache;
|
use super::empty_paths_cache::EmptyPathsCache;
|
||||||
use super::{EdgeDetails, RankingRuleGraphTrait};
|
use super::{EdgeDetails, RankingRuleGraphTrait};
|
||||||
use crate::new::interner::Interned;
|
use crate::search::new::interner::Interned;
|
||||||
use crate::new::logger::SearchLogger;
|
use crate::search::new::logger::SearchLogger;
|
||||||
use crate::new::query_term::WordDerivations;
|
use crate::search::new::query_term::WordDerivations;
|
||||||
use crate::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
// TODO: intern the proximity edges as well?
|
// TODO: intern the proximity edges as well?
|
||||||
|
|
||||||
|
@ -1,15 +1,16 @@
|
|||||||
use super::empty_paths_cache::EmptyPathsCache;
|
|
||||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
|
||||||
use crate::new::interner::Interned;
|
|
||||||
use crate::new::logger::SearchLogger;
|
|
||||||
use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
|
||||||
use crate::new::resolve_query_graph::resolve_phrase;
|
|
||||||
use crate::new::small_bitmap::SmallBitmap;
|
|
||||||
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
|
||||||
use crate::{Result, RoaringBitmapCodec};
|
|
||||||
use heed::BytesDecode;
|
use heed::BytesDecode;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use super::empty_paths_cache::EmptyPathsCache;
|
||||||
|
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
|
use crate::search::new::interner::Interned;
|
||||||
|
use crate::search::new::logger::SearchLogger;
|
||||||
|
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
||||||
|
use crate::search::new::resolve_query_graph::resolve_phrase;
|
||||||
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
|
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||||
|
use crate::{Result, RoaringBitmapCodec};
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub enum TypoEdge {
|
pub enum TypoEdge {
|
||||||
Phrase { phrase: Interned<Phrase> },
|
Phrase { phrase: Interned<Phrase> },
|
||||||
|
@ -1,11 +1,10 @@
|
|||||||
use super::logger::SearchLogger;
|
|
||||||
use super::QueryGraph;
|
|
||||||
use super::SearchContext;
|
|
||||||
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
|
|
||||||
use crate::new::ranking_rule_graph::ProximityGraph;
|
|
||||||
use crate::new::ranking_rule_graph::TypoGraph;
|
|
||||||
use crate::new::words::Words;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
|
use super::logger::SearchLogger;
|
||||||
|
use super::{QueryGraph, SearchContext};
|
||||||
|
use crate::search::new::graph_based_ranking_rule::GraphBasedRankingRule;
|
||||||
|
use crate::search::new::ranking_rule_graph::{ProximityGraph, TypoGraph};
|
||||||
|
use crate::search::new::words::Words;
|
||||||
// use crate::search::new::sort::Sort;
|
// use crate::search::new::sort::Sort;
|
||||||
use crate::{Result, TermsMatchingStrategy};
|
use crate::{Result, TermsMatchingStrategy};
|
||||||
|
|
||||||
@ -239,16 +238,18 @@ pub fn apply_ranking_rules<'search>(
|
|||||||
#[cfg(test)]
|
#[cfg(test)]
|
||||||
mod tests {
|
mod tests {
|
||||||
// use crate::allocator::ALLOC;
|
// use crate::allocator::ALLOC;
|
||||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
|
||||||
use crate::new::{execute_search, SearchContext};
|
|
||||||
use big_s::S;
|
|
||||||
use heed::EnvOpenOptions;
|
|
||||||
use maplit::hashset;
|
|
||||||
use std::fs::File;
|
use std::fs::File;
|
||||||
use std::io::{BufRead, BufReader, Cursor, Seek};
|
use std::io::{BufRead, BufReader, Cursor, Seek};
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
// use crate::new::logger::detailed::DetailedSearchLogger;
|
|
||||||
use crate::new::logger::DefaultSearchLogger;
|
use big_s::S;
|
||||||
|
use heed::EnvOpenOptions;
|
||||||
|
use maplit::hashset;
|
||||||
|
|
||||||
|
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
|
// use crate::search::new::logger::detailed::DetailedSearchLogger;
|
||||||
|
use crate::search::new::logger::DefaultSearchLogger;
|
||||||
|
use crate::search::new::{execute_search, SearchContext};
|
||||||
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy};
|
use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy};
|
||||||
|
|
||||||
@ -265,7 +266,7 @@ mod tests {
|
|||||||
// loop {
|
// loop {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
// let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
// let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||||
let mut ctx = SearchContext::new(&index, &txn);
|
let mut ctx = SearchContext::new(&index, &txn);
|
||||||
let results = execute_search(
|
let results = execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
@ -362,7 +363,7 @@ mod tests {
|
|||||||
// loop {
|
// loop {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
let mut logger = crate::search::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||||
let mut ctx = SearchContext::new(&index, &txn);
|
let mut ctx = SearchContext::new(&index, &txn);
|
||||||
let results = execute_search(
|
let results = execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
|
@ -1,12 +1,14 @@
|
|||||||
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
|
use fxhash::FxHashMap;
|
||||||
|
use heed::BytesDecode;
|
||||||
|
use roaring::{MultiOps, RoaringBitmap};
|
||||||
|
|
||||||
use super::interner::Interned;
|
use super::interner::Interned;
|
||||||
use super::query_term::{Phrase, QueryTerm, WordDerivations};
|
use super::query_term::{Phrase, QueryTerm, WordDerivations};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, QueryNode, SearchContext};
|
use super::{QueryGraph, QueryNode, SearchContext};
|
||||||
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
|
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
|
||||||
use fxhash::FxHashMap;
|
|
||||||
use heed::BytesDecode;
|
|
||||||
use roaring::{MultiOps, RoaringBitmap};
|
|
||||||
use std::collections::VecDeque;
|
|
||||||
|
|
||||||
// TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc.
|
// TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc.
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
|
@ -1,3 +1,5 @@
|
|||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::{
|
use super::{
|
||||||
RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper,
|
RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper,
|
||||||
@ -11,7 +13,6 @@ use crate::{
|
|||||||
Index,
|
Index,
|
||||||
Result,
|
Result,
|
||||||
};
|
};
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
pub struct Sort<'search, Query> {
|
pub struct Sort<'search, Query> {
|
||||||
field_name: String,
|
field_name: String,
|
||||||
|
@ -1,9 +1,11 @@
|
|||||||
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::resolve_query_graph::resolve_query_graph;
|
use super::resolve_query_graph::resolve_query_graph;
|
||||||
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput, SearchContext};
|
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput, SearchContext};
|
||||||
use crate::{Result, TermsMatchingStrategy};
|
use crate::{Result, TermsMatchingStrategy};
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use std::collections::BTreeSet;
|
|
||||||
|
|
||||||
pub struct Words {
|
pub struct Words {
|
||||||
exhausted: bool,
|
exhausted: bool,
|
||||||
|
Loading…
Reference in New Issue
Block a user