mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Prune the query graph after executing a ranking rule
This commit is contained in:
parent
05fe856e6e
commit
a49ddec9df
@ -36,6 +36,7 @@ That is we find the documents where either:
|
||||
- OR: `pretty` is 2-close to `house` AND `house` is 1-close to `by`
|
||||
*/
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
@ -50,6 +51,7 @@ use super::ranking_rule_graph::{
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||
use crate::search::new::interner::Interned;
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::Result;
|
||||
|
||||
pub type Proximity = GraphBasedRankingRule<ProximityGraph>;
|
||||
@ -216,9 +218,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
let original_universe = universe;
|
||||
let mut universe = universe.clone();
|
||||
|
||||
// TODO: remove this unnecessary clone
|
||||
let original_graph = graph.clone();
|
||||
// and this vector as well
|
||||
let mut used_conditions = SmallBitmap::for_interned_values_in(&graph.conditions_interner);
|
||||
let mut paths = vec![];
|
||||
|
||||
// For each path of the given cost, we will compute its associated
|
||||
@ -243,8 +244,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
// We store the edges and their docids in vectors in case the path turns out to be
|
||||
// empty and we need to figure out why it was empty.
|
||||
let mut visited_conditions = vec![];
|
||||
let mut cached_edge_docids =
|
||||
graph.conditions_interner.map(|_| RoaringBitmap::new());
|
||||
let mut cached_edge_docids = vec![];
|
||||
// graph.conditions_interner.map(|_| RoaringBitmap::new());
|
||||
|
||||
for &condition_interned_raw in path {
|
||||
let condition = Interned::new(condition_interned_raw);
|
||||
@ -253,7 +254,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
let edge_docids =
|
||||
edge_docids_cache.get_edge_docids(ctx, condition, graph, &universe)?;
|
||||
|
||||
*cached_edge_docids.get_mut(condition) = edge_docids.clone();
|
||||
cached_edge_docids.push((condition, edge_docids.clone())); // .get_mut(condition) = edge_docids.clone();
|
||||
|
||||
// If the edge is empty, then the path will be empty as well, we update the graph
|
||||
// and caches accordingly and skip to the next candidate path.
|
||||
@ -279,12 +280,12 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
// then we also know that any path containing the same couple of
|
||||
// edges will also be empty.
|
||||
for (past_condition, edge_docids2) in cached_edge_docids.iter() {
|
||||
if past_condition == condition {
|
||||
if *past_condition == condition {
|
||||
continue;
|
||||
};
|
||||
let intersection = edge_docids & edge_docids2;
|
||||
if intersection.is_disjoint(&universe) {
|
||||
empty_paths_cache.add_condition_couple(past_condition, condition);
|
||||
empty_paths_cache.add_condition_couple(*past_condition, condition);
|
||||
}
|
||||
}
|
||||
// We should maybe instead try to compute:
|
||||
@ -292,6 +293,10 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
return Ok(ControlFlow::Continue(()));
|
||||
}
|
||||
}
|
||||
assert!(!path_docids.is_empty());
|
||||
for condition in path {
|
||||
used_conditions.insert(Interned::new(*condition));
|
||||
}
|
||||
bucket |= &path_docids;
|
||||
// Reduce the size of the universe so that we can more optimistically discard candidate paths
|
||||
universe -= path_docids;
|
||||
@ -307,16 +312,50 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
||||
G::log_state(
|
||||
&original_graph,
|
||||
&paths,
|
||||
&state.empty_paths_cache,
|
||||
empty_paths_cache,
|
||||
original_universe,
|
||||
&state.all_distances,
|
||||
all_distances,
|
||||
cost,
|
||||
logger,
|
||||
);
|
||||
|
||||
// TODO: Graph-based ranking rules do not (yet) modify the query graph. We could, however,
|
||||
// remove nodes and/or terms within nodes that weren't present in any of the paths.
|
||||
let next_query_graph = state.graph.query_graph.clone();
|
||||
// We modify the next query graph so that it only contains the subgraph
|
||||
// that was used to compute this bucket
|
||||
// But we only do it in case the bucket length is >1, because otherwise
|
||||
// we know the child ranking rule won't be called anyway
|
||||
let mut next_query_graph = original_graph.query_graph;
|
||||
next_query_graph.simplify();
|
||||
if bucket.len() > 1 {
|
||||
// 1. Gather all the words and phrases used in the computation of this bucket
|
||||
let mut used_words = HashSet::new();
|
||||
let mut used_phrases = HashSet::new();
|
||||
for condition in used_conditions.iter() {
|
||||
let condition = graph.conditions_interner.get(condition);
|
||||
used_words.extend(G::words_used_by_edge_condition(ctx, condition)?);
|
||||
used_phrases.extend(G::phrases_used_by_edge_condition(ctx, condition)?);
|
||||
}
|
||||
// 2. Remove the unused words and phrases from all the nodes in the graph
|
||||
let mut nodes_to_remove = vec![];
|
||||
for (node_id, node) in next_query_graph.nodes.iter_mut() {
|
||||
let term = match &mut node.data {
|
||||
QueryNodeData::Term(term) => term,
|
||||
QueryNodeData::Deleted | QueryNodeData::Start | QueryNodeData::End => continue,
|
||||
};
|
||||
if let Some(new_term) = ctx
|
||||
.term_interner
|
||||
.get(term.value)
|
||||
.removing_forbidden_terms(&used_words, &used_phrases)
|
||||
{
|
||||
if new_term.is_empty() {
|
||||
nodes_to_remove.push(node_id);
|
||||
} else {
|
||||
term.value = ctx.term_interner.insert(new_term);
|
||||
}
|
||||
}
|
||||
}
|
||||
// 3. Remove the empty nodes from the graph
|
||||
next_query_graph.remove_nodes(&nodes_to_remove);
|
||||
}
|
||||
|
||||
self.state = Some(state);
|
||||
|
||||
|
@ -528,7 +528,7 @@ shape: class"
|
||||
ctx: &mut SearchContext,
|
||||
graph: &RankingRuleGraph<R>,
|
||||
paths: &[Vec<u16>],
|
||||
_empty_paths_cache: &DeadEndPathCache<R>,
|
||||
dead_end_paths_cache: &DeadEndPathCache<R>,
|
||||
distances: MappedInterner<Vec<(u16, SmallBitmap<R::EdgeCondition>)>, QueryNode>,
|
||||
file: &mut File,
|
||||
) {
|
||||
@ -552,12 +552,11 @@ shape: class"
|
||||
.unwrap();
|
||||
}
|
||||
EdgeCondition::Conditional(condition) => {
|
||||
let condition = graph.conditions_interner.get(*condition);
|
||||
// let condition = graph.conditions_interner.get(*condition);
|
||||
writeln!(
|
||||
file,
|
||||
"{source_node} -> {dest_node} : \"cost {cost} {edge_label}\"",
|
||||
"{source_node} -> {dest_node} : \"{condition} cost {cost}\"",
|
||||
cost = edge.cost,
|
||||
edge_label = R::label_for_edge_condition(condition)
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
@ -569,28 +568,33 @@ shape: class"
|
||||
// Self::paths_d2_description(graph, paths, file);
|
||||
// writeln!(file, "}}").unwrap();
|
||||
|
||||
writeln!(file, "Shortest Paths {{").unwrap();
|
||||
writeln!(file, "Paths {{").unwrap();
|
||||
Self::paths_d2_description(ctx, graph, paths, file);
|
||||
writeln!(file, "}}").unwrap();
|
||||
|
||||
// writeln!(file, "Empty Edge Couples {{").unwrap();
|
||||
// for (i, (e1, e2)) in empty_paths_cache.empty_couple_edges.iter().enumerate() {
|
||||
// writeln!(file, "{i} : \"\" {{").unwrap();
|
||||
// Self::edge_d2_description(graph, *e1, file);
|
||||
// Self::edge_d2_description(graph, *e2, file);
|
||||
// writeln!(file, "{e1} -- {e2}").unwrap();
|
||||
// writeln!(file, "}}").unwrap();
|
||||
// }
|
||||
// writeln!(file, "}}").unwrap();
|
||||
writeln!(file, "Dead-end couples of conditions {{").unwrap();
|
||||
for (i, (e1, e2)) in dead_end_paths_cache.condition_couples.iter().enumerate() {
|
||||
writeln!(file, "{i} : \"\" {{").unwrap();
|
||||
Self::condition_d2_description(ctx, graph, e1, file);
|
||||
for e2 in e2.iter() {
|
||||
Self::condition_d2_description(ctx, graph, e2, file);
|
||||
writeln!(file, "{e1} -- {e2}").unwrap();
|
||||
}
|
||||
writeln!(file, "}}").unwrap();
|
||||
}
|
||||
writeln!(file, "}}").unwrap();
|
||||
|
||||
// writeln!(file, "Removed Edges {{").unwrap();
|
||||
// for edge_idx in empty_paths_cache.empty_edges.iter() {
|
||||
// writeln!(file, "{edge_idx}").unwrap();
|
||||
// }
|
||||
writeln!(file, "Dead-end edges {{").unwrap();
|
||||
for condition in dead_end_paths_cache.conditions.iter() {
|
||||
writeln!(file, "{condition}").unwrap();
|
||||
}
|
||||
writeln!(file, "}}").unwrap();
|
||||
|
||||
// writeln!(file, "Dead-end prefixes {{").unwrap();
|
||||
// writeln!(file, "}}").unwrap();
|
||||
}
|
||||
fn condition_d2_description<R: RankingRuleGraphTrait>(
|
||||
_ctx: &mut SearchContext,
|
||||
ctx: &mut SearchContext,
|
||||
graph: &RankingRuleGraph<R>,
|
||||
condition_id: Interned<R::EdgeCondition>,
|
||||
file: &mut File,
|
||||
@ -598,10 +602,11 @@ shape: class"
|
||||
let condition = graph.conditions_interner.get(condition_id);
|
||||
writeln!(
|
||||
file,
|
||||
"{condition_id}: \"{}\" {{
|
||||
shape: class
|
||||
}}",
|
||||
R::label_for_edge_condition(condition)
|
||||
"{condition_id} {{
|
||||
shape: class
|
||||
{}
|
||||
}}",
|
||||
R::label_for_edge_condition(ctx, condition).unwrap()
|
||||
)
|
||||
.unwrap();
|
||||
}
|
||||
|
@ -303,7 +303,8 @@ mod tests {
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let results = execute_search(
|
||||
&mut ctx,
|
||||
"which a the releases from poison by the government",
|
||||
"released from prison by the government",
|
||||
// "which a the releases from poison by the government",
|
||||
// "sun flower s are the best",
|
||||
// "zero config",
|
||||
TermsMatchingStrategy::Last,
|
||||
@ -338,7 +339,7 @@ mod tests {
|
||||
|
||||
println!("{}us: {:?}", elapsed.as_micros(), results);
|
||||
}
|
||||
// for (id, _document) in documents {
|
||||
// for (id, document) in documents {
|
||||
// println!("{id}:");
|
||||
// // println!("{document}");
|
||||
// }
|
||||
@ -359,9 +360,13 @@ mod tests {
|
||||
let start = Instant::now();
|
||||
|
||||
let mut s = Search::new(&txn, &index);
|
||||
s.query("which a the releases from poison by the government");
|
||||
s.query(
|
||||
// "which a the releases from poison by the government",
|
||||
// "sun flower s are the best",
|
||||
"zero config",
|
||||
);
|
||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||
// s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
|
||||
// s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlyIterative);
|
||||
let docs = s.execute().unwrap();
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
|
@ -281,7 +281,7 @@ impl QueryGraph {
|
||||
|
||||
/// Simplify the query graph by removing all nodes that are disconnected from
|
||||
/// the start or end nodes.
|
||||
fn simplify(&mut self) {
|
||||
pub fn simplify(&mut self) {
|
||||
loop {
|
||||
let mut nodes_to_remove = vec![];
|
||||
for (node_idx, node) in self.nodes.iter() {
|
||||
|
@ -1,3 +1,4 @@
|
||||
use std::collections::HashSet;
|
||||
use std::mem;
|
||||
use std::ops::RangeInclusive;
|
||||
|
||||
@ -59,6 +60,111 @@ pub struct QueryTerm {
|
||||
pub use_prefix_db: Option<Interned<String>>,
|
||||
}
|
||||
impl QueryTerm {
|
||||
pub fn removing_forbidden_terms(
|
||||
&self,
|
||||
allowed_words: &HashSet<Interned<String>>,
|
||||
allowed_phrases: &HashSet<Interned<Phrase>>,
|
||||
) -> Option<Self> {
|
||||
let QueryTerm {
|
||||
original,
|
||||
is_ngram,
|
||||
is_prefix,
|
||||
phrase,
|
||||
zero_typo,
|
||||
prefix_of,
|
||||
synonyms,
|
||||
split_words,
|
||||
one_typo,
|
||||
two_typos,
|
||||
use_prefix_db,
|
||||
} = self;
|
||||
|
||||
let mut changed = false;
|
||||
|
||||
let mut new_zero_typo = None;
|
||||
if let Some(w) = zero_typo {
|
||||
if allowed_words.contains(w) {
|
||||
new_zero_typo = Some(*w);
|
||||
} else {
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
// TODO: this is incorrect, prefix DB stuff should be treated separately
|
||||
let mut new_use_prefix_db = None;
|
||||
if let Some(w) = use_prefix_db {
|
||||
if allowed_words.contains(w) {
|
||||
new_use_prefix_db = Some(*w);
|
||||
} else {
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
let mut new_prefix_of = vec![];
|
||||
for w in prefix_of.iter() {
|
||||
if allowed_words.contains(w) {
|
||||
new_prefix_of.push(*w);
|
||||
} else {
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
let mut new_one_typo = vec![];
|
||||
for w in one_typo.iter() {
|
||||
if allowed_words.contains(w) {
|
||||
new_one_typo.push(*w);
|
||||
} else {
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
let mut new_two_typos = vec![];
|
||||
for w in two_typos.iter() {
|
||||
if allowed_words.contains(w) {
|
||||
new_two_typos.push(*w);
|
||||
} else {
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
// TODO: this is incorrect, prefix DB stuff should be treated separately
|
||||
let mut new_phrase = None;
|
||||
if let Some(w) = phrase {
|
||||
if !allowed_phrases.contains(w) {
|
||||
new_phrase = Some(*w);
|
||||
} else {
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
let mut new_split_words = None;
|
||||
if let Some(w) = split_words {
|
||||
if allowed_phrases.contains(w) {
|
||||
new_split_words = Some(*w);
|
||||
} else {
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
let mut new_synonyms = vec![];
|
||||
for w in synonyms.iter() {
|
||||
if allowed_phrases.contains(w) {
|
||||
new_synonyms.push(*w);
|
||||
} else {
|
||||
changed = true;
|
||||
}
|
||||
}
|
||||
if changed {
|
||||
Some(QueryTerm {
|
||||
original: *original,
|
||||
is_ngram: *is_ngram,
|
||||
is_prefix: *is_prefix,
|
||||
phrase: new_phrase,
|
||||
zero_typo: new_zero_typo,
|
||||
prefix_of: new_prefix_of.into_boxed_slice(),
|
||||
synonyms: new_synonyms.into_boxed_slice(),
|
||||
split_words: new_split_words,
|
||||
one_typo: new_one_typo.into_boxed_slice(),
|
||||
two_typos: new_two_typos.into_boxed_slice(),
|
||||
use_prefix_db: new_use_prefix_db,
|
||||
})
|
||||
} else {
|
||||
None
|
||||
}
|
||||
}
|
||||
pub fn phrase(
|
||||
word_interner: &mut DedupInterner<String>,
|
||||
phrase_interner: &mut DedupInterner<Phrase>,
|
||||
|
@ -33,7 +33,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
empty_paths_cache,
|
||||
&mut visit,
|
||||
&mut vec![],
|
||||
&mut SmallBitmap::new(self.edges_store.len()),
|
||||
&mut SmallBitmap::for_interned_values_in(&self.conditions_interner),
|
||||
&mut empty_paths_cache.conditions.clone(),
|
||||
)?;
|
||||
Ok(())
|
||||
|
@ -16,6 +16,7 @@ mod proximity;
|
||||
/// Implementation of the `typo` ranking rule
|
||||
mod typo;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::hash::Hash;
|
||||
|
||||
pub use edge_docids_cache::EdgeConditionDocIdsCache;
|
||||
@ -26,6 +27,7 @@ pub use typo::{TypoEdge, TypoGraph};
|
||||
|
||||
use super::interner::{DedupInterner, FixedSizeInterner, Interned, MappedInterner};
|
||||
use super::logger::SearchLogger;
|
||||
use super::query_term::Phrase;
|
||||
use super::small_bitmap::SmallBitmap;
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
@ -82,7 +84,19 @@ pub trait RankingRuleGraphTrait: Sized {
|
||||
|
||||
/// Return the label of the given edge condition, to be used when visualising
|
||||
/// the ranking rule graph.
|
||||
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String;
|
||||
fn label_for_edge_condition<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &Self::EdgeCondition,
|
||||
) -> Result<String>;
|
||||
|
||||
fn words_used_by_edge_condition<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &Self::EdgeCondition,
|
||||
) -> Result<HashSet<Interned<String>>>;
|
||||
fn phrases_used_by_edge_condition<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &Self::EdgeCondition,
|
||||
) -> Result<HashSet<Interned<Phrase>>>;
|
||||
|
||||
/// Compute the document ids associated with the given edge condition,
|
||||
/// restricted to the given universe.
|
||||
|
@ -1,6 +1,9 @@
|
||||
pub mod build;
|
||||
pub mod compute_docids;
|
||||
|
||||
use std::collections::HashSet;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::DeadEndPathCache;
|
||||
@ -44,17 +47,6 @@ pub enum ProximityGraph {}
|
||||
impl RankingRuleGraphTrait for ProximityGraph {
|
||||
type EdgeCondition = ProximityCondition;
|
||||
|
||||
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String {
|
||||
match edge {
|
||||
ProximityCondition::Term { term } => {
|
||||
format!("term {term}")
|
||||
}
|
||||
ProximityCondition::Pairs { pairs } => {
|
||||
format!("pairs {}", pairs.len())
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn resolve_edge_condition<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
condition: &Self::EdgeCondition,
|
||||
@ -83,4 +75,113 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
) {
|
||||
logger.log_proximity_state(graph, paths, empty_paths_cache, universe, distances, cost);
|
||||
}
|
||||
|
||||
fn label_for_edge_condition<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &Self::EdgeCondition,
|
||||
) -> Result<String> {
|
||||
match edge {
|
||||
ProximityCondition::Term { term } => {
|
||||
let term = ctx.term_interner.get(*term);
|
||||
Ok(format!("{} : exists", ctx.word_interner.get(term.original)))
|
||||
}
|
||||
ProximityCondition::Pairs { pairs } => {
|
||||
let mut s = String::new();
|
||||
for pair in pairs.iter() {
|
||||
match pair {
|
||||
WordPair::Words { phrases, left, right, proximity } => {
|
||||
let left = ctx.word_interner.get(*left);
|
||||
let right = ctx.word_interner.get(*right);
|
||||
if !phrases.is_empty() {
|
||||
s.push_str(&format!("{} phrases + ", phrases.len()));
|
||||
}
|
||||
s.push_str(&format!("\"{left} {right}\": {proximity}\n"));
|
||||
}
|
||||
WordPair::WordPrefix { phrases, left, right_prefix, proximity } => {
|
||||
let left = ctx.word_interner.get(*left);
|
||||
let right = ctx.word_interner.get(*right_prefix);
|
||||
if !phrases.is_empty() {
|
||||
s.push_str(&format!("{} phrases + ", phrases.len()));
|
||||
}
|
||||
s.push_str(&format!("\"{left} {right}...\" : {proximity}\n"));
|
||||
}
|
||||
WordPair::WordPrefixSwapped { left_prefix, right, proximity } => {
|
||||
let left = ctx.word_interner.get(*left_prefix);
|
||||
let right = ctx.word_interner.get(*right);
|
||||
s.push_str(&format!("\"{left}... {right}\" : {proximity}\n"));
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(s)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn words_used_by_edge_condition<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &Self::EdgeCondition,
|
||||
) -> Result<HashSet<Interned<String>>> {
|
||||
match edge {
|
||||
ProximityCondition::Term { term } => {
|
||||
let term = ctx.term_interner.get(*term);
|
||||
Ok(HashSet::from_iter(term.all_single_words_except_prefix_db()))
|
||||
}
|
||||
ProximityCondition::Pairs { pairs } => {
|
||||
let mut set = HashSet::new();
|
||||
for pair in pairs.iter() {
|
||||
match pair {
|
||||
WordPair::Words { phrases: _, left, right, proximity: _ } => {
|
||||
set.insert(*left);
|
||||
set.insert(*right);
|
||||
}
|
||||
WordPair::WordPrefix { phrases: _, left, right_prefix, proximity: _ } => {
|
||||
set.insert(*left);
|
||||
// TODO: this is not correct, there should be another trait method for collecting the prefixes
|
||||
// to be used with the prefix DBs
|
||||
set.insert(*right_prefix);
|
||||
}
|
||||
WordPair::WordPrefixSwapped { left_prefix, right, proximity: _ } => {
|
||||
// TODO: this is not correct, there should be another trait method for collecting the prefixes
|
||||
// to be used with the prefix DBs
|
||||
set.insert(*left_prefix);
|
||||
set.insert(*right);
|
||||
}
|
||||
}
|
||||
}
|
||||
Ok(set)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
fn phrases_used_by_edge_condition<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &Self::EdgeCondition,
|
||||
) -> Result<HashSet<Interned<Phrase>>> {
|
||||
match edge {
|
||||
ProximityCondition::Term { term } => {
|
||||
let term = ctx.term_interner.get(*term);
|
||||
Ok(HashSet::from_iter(term.all_phrases()))
|
||||
}
|
||||
ProximityCondition::Pairs { pairs } => {
|
||||
let mut set = HashSet::new();
|
||||
for pair in pairs.iter() {
|
||||
match pair {
|
||||
WordPair::Words { phrases, left: _, right: _, proximity: _ } => {
|
||||
set.extend(phrases.iter().copied());
|
||||
}
|
||||
WordPair::WordPrefix {
|
||||
phrases,
|
||||
left: _,
|
||||
right_prefix: _,
|
||||
proximity: _,
|
||||
} => {
|
||||
set.extend(phrases.iter().copied());
|
||||
}
|
||||
WordPair::WordPrefixSwapped { left_prefix: _, right: _, proximity: _ } => {}
|
||||
}
|
||||
}
|
||||
Ok(set)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -5,10 +5,13 @@ use super::{EdgeCondition, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||
use crate::search::new::logger::SearchLogger;
|
||||
use crate::search::new::query_graph::QueryNodeData;
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, QueryTerm};
|
||||
use crate::search::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm};
|
||||
use crate::search::new::small_bitmap::SmallBitmap;
|
||||
use crate::search::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
use std::collections::HashSet;
|
||||
use std::fmt::Write;
|
||||
use std::iter::FromIterator;
|
||||
|
||||
#[derive(Clone, PartialEq, Eq, Hash)]
|
||||
pub struct TypoEdge {
|
||||
@ -21,10 +24,6 @@ pub enum TypoGraph {}
|
||||
impl RankingRuleGraphTrait for TypoGraph {
|
||||
type EdgeCondition = TypoEdge;
|
||||
|
||||
fn label_for_edge_condition(edge: &Self::EdgeCondition) -> String {
|
||||
format!(", {} typos", edge.nbr_typos)
|
||||
}
|
||||
|
||||
fn resolve_edge_condition<'db_cache, 'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &Self::EdgeCondition,
|
||||
@ -147,4 +146,78 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
) {
|
||||
logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances, cost);
|
||||
}
|
||||
|
||||
fn label_for_edge_condition<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &Self::EdgeCondition,
|
||||
) -> Result<String> {
|
||||
let TypoEdge { term, nbr_typos: _ } = edge;
|
||||
let term = ctx.term_interner.get(*term);
|
||||
let QueryTerm {
|
||||
original: _,
|
||||
is_ngram: _,
|
||||
is_prefix: _,
|
||||
phrase,
|
||||
zero_typo,
|
||||
prefix_of,
|
||||
synonyms,
|
||||
split_words,
|
||||
one_typo,
|
||||
two_typos,
|
||||
use_prefix_db,
|
||||
} = term;
|
||||
let mut s = String::new();
|
||||
if let Some(phrase) = phrase {
|
||||
let phrase = ctx.phrase_interner.get(*phrase).description(&ctx.word_interner);
|
||||
writeln!(&mut s, "\"{phrase}\" : phrase").unwrap();
|
||||
}
|
||||
if let Some(w) = zero_typo {
|
||||
let w = ctx.word_interner.get(*w);
|
||||
writeln!(&mut s, "\"{w}\" : 0 typo").unwrap();
|
||||
}
|
||||
for w in prefix_of.iter() {
|
||||
let w = ctx.word_interner.get(*w);
|
||||
writeln!(&mut s, "\"{w}\" : prefix").unwrap();
|
||||
}
|
||||
for w in one_typo.iter() {
|
||||
let w = ctx.word_interner.get(*w);
|
||||
writeln!(&mut s, "\"{w}\" : 1 typo").unwrap();
|
||||
}
|
||||
for w in two_typos.iter() {
|
||||
let w = ctx.word_interner.get(*w);
|
||||
writeln!(&mut s, "\"{w}\" : 2 typos").unwrap();
|
||||
}
|
||||
if let Some(phrase) = split_words {
|
||||
let phrase = ctx.phrase_interner.get(*phrase).description(&ctx.word_interner);
|
||||
writeln!(&mut s, "\"{phrase}\" : split words").unwrap();
|
||||
}
|
||||
for phrase in synonyms.iter() {
|
||||
let phrase = ctx.phrase_interner.get(*phrase).description(&ctx.word_interner);
|
||||
writeln!(&mut s, "\"{phrase}\" : synonym").unwrap();
|
||||
}
|
||||
if let Some(w) = use_prefix_db {
|
||||
let w = ctx.word_interner.get(*w);
|
||||
writeln!(&mut s, "\"{w}\" : use prefix db").unwrap();
|
||||
}
|
||||
|
||||
Ok(s)
|
||||
}
|
||||
|
||||
fn words_used_by_edge_condition<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &Self::EdgeCondition,
|
||||
) -> Result<HashSet<Interned<String>>> {
|
||||
let TypoEdge { term, .. } = edge;
|
||||
let term = ctx.term_interner.get(*term);
|
||||
Ok(HashSet::from_iter(term.all_single_words_except_prefix_db()))
|
||||
}
|
||||
|
||||
fn phrases_used_by_edge_condition<'ctx>(
|
||||
ctx: &mut SearchContext<'ctx>,
|
||||
edge: &Self::EdgeCondition,
|
||||
) -> Result<HashSet<Interned<Phrase>>> {
|
||||
let TypoEdge { term, .. } = edge;
|
||||
let term = ctx.term_interner.get(*term);
|
||||
Ok(HashSet::from_iter(term.all_phrases()))
|
||||
}
|
||||
}
|
||||
|
Loading…
Reference in New Issue
Block a user