mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 06:44:27 +01:00
WIP
This commit is contained in:
parent
2853009987
commit
8b4e07e1a3
@ -70,7 +70,9 @@ pub mod update;
|
|||||||
#[macro_use]
|
#[macro_use]
|
||||||
pub mod snapshot_tests;
|
pub mod snapshot_tests;
|
||||||
|
|
||||||
pub use search::new::{execute_search, SearchContext};
|
pub use search::new::DetailedSearchLogger;
|
||||||
|
|
||||||
|
pub use search::new::{execute_search, DefaultSearchLogger, SearchContext};
|
||||||
|
|
||||||
use std::collections::{BTreeMap, HashMap};
|
use std::collections::{BTreeMap, HashMap};
|
||||||
use std::convert::{TryFrom, TryInto};
|
use std::convert::{TryFrom, TryInto};
|
||||||
|
@ -45,8 +45,8 @@ use super::interner::MappedInterner;
|
|||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
use super::query_graph::QueryNode;
|
use super::query_graph::QueryNode;
|
||||||
use super::ranking_rule_graph::{
|
use super::ranking_rule_graph::{
|
||||||
ConditionDocIdsCache, DeadEndPathCache, ProximityGraph, RankingRuleGraph,
|
ConditionDocIdsCache, DeadEndsCache, ProximityGraph, RankingRuleGraph, RankingRuleGraphTrait,
|
||||||
RankingRuleGraphTrait, TypoGraph,
|
TypoGraph,
|
||||||
};
|
};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
use super::{QueryGraph, RankingRule, RankingRuleOutput, SearchContext};
|
||||||
@ -87,7 +87,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
/// Cache to retrieve the docids associated with each edge
|
/// Cache to retrieve the docids associated with each edge
|
||||||
conditions_cache: ConditionDocIdsCache<G>,
|
conditions_cache: ConditionDocIdsCache<G>,
|
||||||
/// Cache used to optimistically discard paths that resolve to no documents.
|
/// Cache used to optimistically discard paths that resolve to no documents.
|
||||||
dead_end_path_cache: DeadEndPathCache<G>,
|
dead_end_path_cache: DeadEndsCache<G::Condition>,
|
||||||
/// A structure giving the list of possible costs from each node to the end node,
|
/// A structure giving the list of possible costs from each node to the end node,
|
||||||
/// along with a set of unavoidable edges that must be traversed to achieve that distance.
|
/// along with a set of unavoidable edges that must be traversed to achieve that distance.
|
||||||
all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>,
|
all_distances: MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>,
|
||||||
@ -103,7 +103,7 @@ fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>(
|
|||||||
graph: &mut RankingRuleGraph<G>,
|
graph: &mut RankingRuleGraph<G>,
|
||||||
condition_docids_cache: &mut ConditionDocIdsCache<G>,
|
condition_docids_cache: &mut ConditionDocIdsCache<G>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
dead_end_path_cache: &mut DeadEndPathCache<G>,
|
dead_end_path_cache: &mut DeadEndsCache<G::Condition>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for edge_id in graph.edges_store.indexes() {
|
for edge_id in graph.edges_store.indexes() {
|
||||||
let Some(edge) = graph.edges_store.get(edge_id).as_ref() else {
|
let Some(edge) = graph.edges_store.get(edge_id).as_ref() else {
|
||||||
@ -113,9 +113,9 @@ fn remove_empty_edges<'ctx, G: RankingRuleGraphTrait>(
|
|||||||
|
|
||||||
let docids =
|
let docids =
|
||||||
condition_docids_cache.get_condition_docids(ctx, condition, graph, universe)?;
|
condition_docids_cache.get_condition_docids(ctx, condition, graph, universe)?;
|
||||||
if docids.is_disjoint(universe) {
|
if docids.is_empty() {
|
||||||
graph.remove_edges_with_condition(condition);
|
graph.remove_edges_with_condition(condition);
|
||||||
dead_end_path_cache.add_condition(condition);
|
dead_end_path_cache.forbid_condition(condition); // add_condition(condition);
|
||||||
condition_docids_cache.cache.remove(&condition);
|
condition_docids_cache.cache.remove(&condition);
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -135,8 +135,8 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
query_graph: &QueryGraph,
|
query_graph: &QueryGraph,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
||||||
let mut condition_docids_cache = ConditionDocIdsCache::default();
|
let mut condition_docids_cache = ConditionDocIdsCache::new(universe);
|
||||||
let mut dead_end_path_cache = DeadEndPathCache::new(&graph.conditions_interner);
|
let mut dead_end_path_cache = DeadEndsCache::new(&graph.conditions_interner);
|
||||||
|
|
||||||
// First simplify the graph as much as possible, by computing the docids of all the conditions
|
// First simplify the graph as much as possible, by computing the docids of all the conditions
|
||||||
// within the rule's universe and removing the edges that have no associated docids.
|
// within the rule's universe and removing the edges that have no associated docids.
|
||||||
@ -230,62 +230,79 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
graph.query_graph.root_node,
|
graph.query_graph.root_node,
|
||||||
cost,
|
cost,
|
||||||
all_distances,
|
all_distances,
|
||||||
|
dead_end_path_cache.forbidden.clone(),
|
||||||
|
|condition, forbidden_conditions| {},
|
||||||
dead_end_path_cache,
|
dead_end_path_cache,
|
||||||
|path, graph, dead_end_path_cache| {
|
|path, graph, dead_end_path_cache| {
|
||||||
// Accumulate the path for logging purposes only
|
// Accumulate the path for logging purposes only
|
||||||
paths.push(path.to_vec());
|
paths.push(path.to_vec());
|
||||||
|
|
||||||
let mut path_docids = universe.clone();
|
let mut path_docids = universe.clone();
|
||||||
|
|
||||||
// We store the edges and their docids in vectors in case the path turns out to be
|
// We store the edges and their docids in vectors in case the path turns out to be
|
||||||
// empty and we need to figure out why it was empty.
|
// empty and we need to figure out why it was empty.
|
||||||
let mut visited_conditions = vec![];
|
let mut visited_conditions = vec![];
|
||||||
let mut cached_condition_docids = vec![];
|
let mut cached_condition_docids = vec![];
|
||||||
// graph.conditions_interner.map(|_| RoaringBitmap::new());
|
|
||||||
|
|
||||||
for &condition in path {
|
for &latest_condition in path {
|
||||||
visited_conditions.push(condition);
|
visited_conditions.push(latest_condition);
|
||||||
|
|
||||||
let condition_docids = condition_docids_cache
|
let condition_docids = condition_docids_cache.get_condition_docids(
|
||||||
.get_condition_docids(ctx, condition, graph, &universe)?;
|
ctx,
|
||||||
|
latest_condition,
|
||||||
|
graph,
|
||||||
|
&universe,
|
||||||
|
)?;
|
||||||
|
|
||||||
cached_condition_docids.push((condition, condition_docids.clone())); // .get_mut(condition) = condition_docids.clone();
|
cached_condition_docids.push((latest_condition, condition_docids.clone()));
|
||||||
|
|
||||||
// If the edge is empty, then the path will be empty as well, we update the graph
|
// If the edge is empty, then the path will be empty as well, we update the graph
|
||||||
// and caches accordingly and skip to the next candidate path.
|
// and caches accordingly and skip to the next candidate path.
|
||||||
if condition_docids.is_disjoint(&universe) {
|
if condition_docids.is_disjoint(&universe) {
|
||||||
// 1. Store in the cache that this edge is empty for this universe
|
// 1. Store in the cache that this edge is empty for this universe
|
||||||
dead_end_path_cache.add_condition(condition);
|
dead_end_path_cache.forbid_condition(latest_condition);
|
||||||
// 2. remove this edge from the ranking rule graph
|
// 2. remove all the edges with this condition from the ranking rule graph
|
||||||
// ouch, no! :( need to link a condition to one or more ranking rule edges
|
graph.remove_edges_with_condition(latest_condition);
|
||||||
graph.remove_edges_with_condition(condition);
|
|
||||||
// 3. Also remove the entry from the condition_docids_cache, since we don't need it anymore
|
// 3. Also remove the entry from the condition_docids_cache, since we don't need it anymore
|
||||||
condition_docids_cache.cache.remove(&condition);
|
condition_docids_cache.cache.remove(&latest_condition);
|
||||||
return Ok(ControlFlow::Continue(()));
|
return Ok(ControlFlow::Continue(()));
|
||||||
}
|
}
|
||||||
path_docids &= condition_docids;
|
|
||||||
|
|
||||||
// If the (sub)path is empty, we try to figure out why and update the caches accordingly.
|
// If the (sub)path is empty, we try to figure out why and update the caches accordingly.
|
||||||
if path_docids.is_disjoint(&universe) {
|
if path_docids.is_disjoint(condition_docids) {
|
||||||
// First, we know that this path is empty, and thus any path
|
// First, we know that this path is empty, and thus any path
|
||||||
// that is a superset of it will also be empty.
|
// that is a superset of it will also be empty.
|
||||||
dead_end_path_cache.add_prefix(&visited_conditions);
|
dead_end_path_cache.forbid_condition_after_prefix(
|
||||||
|
visited_conditions[..visited_conditions.len() - 1].iter().copied(),
|
||||||
|
latest_condition,
|
||||||
|
);
|
||||||
|
|
||||||
|
let mut dead_end_cache_cursor = dead_end_path_cache;
|
||||||
|
|
||||||
// Second, if the intersection between this edge and any
|
// Second, if the intersection between this edge and any
|
||||||
// previous one is disjoint with the universe,
|
// previous prefix is disjoint with the universe, then... TODO
|
||||||
// then we also know that any path containing the same couple of
|
for (past_condition, past_condition_docids) in
|
||||||
// edges will also be empty.
|
cached_condition_docids.iter()
|
||||||
for (past_condition, condition_docids2) in cached_condition_docids.iter() {
|
{
|
||||||
if *past_condition == condition {
|
// TODO: should ensure that it is simply not possible to have twice
|
||||||
|
// the same condition in the cached_condition_docids. Maybe it is
|
||||||
|
// already the case?
|
||||||
|
dead_end_cache_cursor =
|
||||||
|
dead_end_cache_cursor.advance(*past_condition).unwrap();
|
||||||
|
// TODO: check how that interacts with the dead end cache?
|
||||||
|
if *past_condition == latest_condition {
|
||||||
|
// TODO: should we break instead?
|
||||||
|
// Is it even possible?
|
||||||
continue;
|
continue;
|
||||||
};
|
};
|
||||||
let intersection = condition_docids & condition_docids2;
|
if condition_docids.is_disjoint(past_condition_docids) {
|
||||||
if intersection.is_disjoint(&universe) {
|
dead_end_cache_cursor.forbid_condition(latest_condition);
|
||||||
dead_end_path_cache
|
|
||||||
.add_condition_couple(*past_condition, condition);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// We should maybe instead try to compute:
|
// We should maybe instead try to compute:
|
||||||
// 0th & nth & 1st & n-1th & 2nd & etc...
|
// 0th & nth & 1st & n-1th & 2nd & etc...
|
||||||
return Ok(ControlFlow::Continue(()));
|
return Ok(ControlFlow::Continue(()));
|
||||||
|
} else {
|
||||||
|
path_docids &= condition_docids;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
assert!(!path_docids.is_empty());
|
assert!(!path_docids.is_empty());
|
||||||
@ -303,7 +320,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
)?;
|
)?;
|
||||||
|
// println!(" {} paths of cost {} in {}", paths.len(), cost, self.id);
|
||||||
G::log_state(
|
G::log_state(
|
||||||
&original_graph,
|
&original_graph,
|
||||||
&paths,
|
&paths,
|
||||||
|
@ -152,7 +152,7 @@ impl<T> Hash for Interned<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<T: Ord> Ord for Interned<T> {
|
impl<T> Ord for Interned<T> {
|
||||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||||
self.idx.cmp(&other.idx)
|
self.idx.cmp(&other.idx)
|
||||||
}
|
}
|
||||||
|
@ -3,7 +3,7 @@ use std::io::Write;
|
|||||||
use std::path::PathBuf;
|
use std::path::PathBuf;
|
||||||
use std::time::Instant;
|
use std::time::Instant;
|
||||||
|
|
||||||
use rand::random;
|
// use rand::random;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::search::new::interner::{Interned, MappedInterner};
|
use crate::search::new::interner::{Interned, MappedInterner};
|
||||||
@ -323,12 +323,11 @@ impl DetailedSearchLogger {
|
|||||||
let cur_activated_id = activated_id(×tamp);
|
let cur_activated_id = activated_id(×tamp);
|
||||||
let docids = new.iter().collect::<Vec<_>>();
|
let docids = new.iter().collect::<Vec<_>>();
|
||||||
let len = new.len();
|
let len = new.len();
|
||||||
let random = random::<u64>();
|
|
||||||
|
|
||||||
writeln!(
|
writeln!(
|
||||||
&mut file,
|
&mut file,
|
||||||
"{cur_ranking_rule}.{cur_activated_id} -> results.{random} : \"add {len}\"
|
"{cur_ranking_rule}.{cur_activated_id} -> results.{cur_ranking_rule}{cur_activated_id} : \"add {len}\"
|
||||||
results.{random} {{
|
results.{cur_ranking_rule}{cur_activated_id} {{
|
||||||
tooltip: \"{docids:?}\"
|
tooltip: \"{docids:?}\"
|
||||||
style {{
|
style {{
|
||||||
fill: \"#B6E2D3\"
|
fill: \"#B6E2D3\"
|
||||||
@ -572,17 +571,17 @@ shape: class"
|
|||||||
Self::paths_d2_description(ctx, graph, paths, file);
|
Self::paths_d2_description(ctx, graph, paths, file);
|
||||||
writeln!(file, "}}").unwrap();
|
writeln!(file, "}}").unwrap();
|
||||||
|
|
||||||
writeln!(file, "Dead-end couples of conditions {{").unwrap();
|
// writeln!(file, "Dead-end couples of conditions {{").unwrap();
|
||||||
for (i, (e1, e2)) in dead_end_paths_cache.condition_couples.iter().enumerate() {
|
// for (i, (e1, e2)) in dead_end_paths_cache.condition_couples.iter().enumerate() {
|
||||||
writeln!(file, "{i} : \"\" {{").unwrap();
|
// writeln!(file, "{i} : \"\" {{").unwrap();
|
||||||
Self::condition_d2_description(ctx, graph, e1, file);
|
// Self::condition_d2_description(ctx, graph, e1, file);
|
||||||
for e2 in e2.iter() {
|
// for e2 in e2.iter() {
|
||||||
Self::condition_d2_description(ctx, graph, e2, file);
|
// Self::condition_d2_description(ctx, graph, e2, file);
|
||||||
writeln!(file, "{e1} -- {e2}").unwrap();
|
// writeln!(file, "{e1} -- {e2}").unwrap();
|
||||||
}
|
// }
|
||||||
writeln!(file, "}}").unwrap();
|
// writeln!(file, "}}").unwrap();
|
||||||
}
|
// }
|
||||||
writeln!(file, "}}").unwrap();
|
// writeln!(file, "}}").unwrap();
|
||||||
|
|
||||||
writeln!(file, "Dead-end edges {{").unwrap();
|
writeln!(file, "Dead-end edges {{").unwrap();
|
||||||
for condition in dead_end_paths_cache.conditions.iter() {
|
for condition in dead_end_paths_cache.conditions.iter() {
|
||||||
|
@ -1,4 +1,4 @@
|
|||||||
#[cfg(test)]
|
// #[cfg(test)]
|
||||||
pub mod detailed;
|
pub mod detailed;
|
||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -6,8 +6,7 @@ use roaring::RoaringBitmap;
|
|||||||
use super::interner::{Interned, MappedInterner};
|
use super::interner::{Interned, MappedInterner};
|
||||||
use super::query_graph::QueryNode;
|
use super::query_graph::QueryNode;
|
||||||
use super::ranking_rule_graph::{
|
use super::ranking_rule_graph::{
|
||||||
DeadEndPathCache, ProximityCondition, ProximityGraph, RankingRuleGraph, TypoCondition,
|
DeadEndsCache, ProximityCondition, ProximityGraph, RankingRuleGraph, TypoCondition, TypoGraph,
|
||||||
TypoGraph,
|
|
||||||
};
|
};
|
||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{RankingRule, RankingRuleQueryTrait};
|
use super::{RankingRule, RankingRuleQueryTrait};
|
||||||
@ -67,7 +66,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||||
paths: &[Vec<Interned<ProximityCondition>>],
|
paths: &[Vec<Interned<ProximityCondition>>],
|
||||||
dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
|
dead_end_path_cache: &DeadEndsCache<ProximityCondition>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
@ -78,7 +77,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
paths: &[Vec<Interned<TypoCondition>>],
|
paths: &[Vec<Interned<TypoCondition>>],
|
||||||
dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
|
dead_end_path_cache: &DeadEndsCache<TypoCondition>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
@ -138,7 +137,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
|||||||
&mut self,
|
&mut self,
|
||||||
_query_graph: &RankingRuleGraph<ProximityGraph>,
|
_query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||||
_paths_map: &[Vec<Interned<ProximityCondition>>],
|
_paths_map: &[Vec<Interned<ProximityCondition>>],
|
||||||
_dead_end_path_cache: &DeadEndPathCache<ProximityGraph>,
|
_dead_end_path_cache: &DeadEndsCache<ProximityCondition>,
|
||||||
_universe: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
_distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
_distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||||
_cost: u16,
|
_cost: u16,
|
||||||
@ -149,7 +148,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
|||||||
&mut self,
|
&mut self,
|
||||||
_query_graph: &RankingRuleGraph<TypoGraph>,
|
_query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
_paths: &[Vec<Interned<TypoCondition>>],
|
_paths: &[Vec<Interned<TypoCondition>>],
|
||||||
_dead_end_path_cache: &DeadEndPathCache<TypoGraph>,
|
_dead_end_path_cache: &DeadEndsCache<TypoCondition>,
|
||||||
_universe: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
_distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>,
|
_distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>,
|
||||||
_cost: u16,
|
_cost: u16,
|
||||||
|
@ -15,26 +15,26 @@ mod sort;
|
|||||||
// TODO: documentation + comments
|
// TODO: documentation + comments
|
||||||
mod words;
|
mod words;
|
||||||
|
|
||||||
|
// #[cfg(test)]
|
||||||
|
pub use logger::detailed::DetailedSearchLogger;
|
||||||
pub use logger::{DefaultSearchLogger, SearchLogger};
|
pub use logger::{DefaultSearchLogger, SearchLogger};
|
||||||
|
|
||||||
use std::collections::{BTreeSet, HashSet};
|
use std::collections::{BTreeSet, HashSet};
|
||||||
|
|
||||||
|
use crate::{Filter, Index, MatchingWords, Result, Search, SearchResult, TermsMatchingStrategy};
|
||||||
use charabia::Tokenize;
|
use charabia::Tokenize;
|
||||||
use db_cache::DatabaseCache;
|
use db_cache::DatabaseCache;
|
||||||
|
use graph_based_ranking_rule::{Proximity, Typo};
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use query_graph::{QueryGraph, QueryNode};
|
use interner::DedupInterner;
|
||||||
pub use ranking_rules::{bucket_sort, RankingRule, RankingRuleOutput, RankingRuleQueryTrait};
|
use query_graph::{QueryGraph, QueryNode, QueryNodeData};
|
||||||
|
use query_term::{located_query_terms_from_string, Phrase, QueryTerm};
|
||||||
|
use ranking_rules::{bucket_sort, PlaceholderQuery, RankingRuleOutput, RankingRuleQueryTrait};
|
||||||
|
use resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
use words::Words;
|
||||||
|
|
||||||
use self::interner::DedupInterner;
|
use self::ranking_rules::RankingRule;
|
||||||
use self::query_graph::QueryNodeData;
|
|
||||||
use self::query_term::{Phrase, QueryTerm};
|
|
||||||
use self::ranking_rules::PlaceholderQuery;
|
|
||||||
use self::resolve_query_graph::{resolve_query_graph, QueryTermDocIdsCache};
|
|
||||||
use crate::search::new::graph_based_ranking_rule::{Proximity, Typo};
|
|
||||||
use crate::search::new::query_term::located_query_terms_from_string;
|
|
||||||
use crate::search::new::words::Words;
|
|
||||||
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
|
||||||
|
|
||||||
/// A structure used throughout the execution of a search query.
|
/// A structure used throughout the execution of a search query.
|
||||||
pub struct SearchContext<'ctx> {
|
pub struct SearchContext<'ctx> {
|
||||||
@ -231,12 +231,12 @@ pub fn execute_search<'ctx>(
|
|||||||
length: usize,
|
length: usize,
|
||||||
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
|
placeholder_search_logger: &mut dyn SearchLogger<PlaceholderQuery>,
|
||||||
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) -> Result<Vec<u32>> {
|
) -> Result<SearchResult> {
|
||||||
assert!(!query.is_empty());
|
assert!(!query.is_empty());
|
||||||
let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None)?;
|
let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None)?;
|
||||||
let graph = QueryGraph::from_query(ctx, query_terms)?;
|
let graph = QueryGraph::from_query(ctx, query_terms)?;
|
||||||
|
|
||||||
let universe = if let Some(filters) = filters {
|
let mut universe = if let Some(filters) = filters {
|
||||||
filters.evaluate(ctx.txn, ctx.index)?
|
filters.evaluate(ctx.txn, ctx.index)?
|
||||||
} else {
|
} else {
|
||||||
ctx.index.documents_ids(ctx.txn)?
|
ctx.index.documents_ids(ctx.txn)?
|
||||||
@ -249,8 +249,8 @@ pub fn execute_search<'ctx>(
|
|||||||
// But in that case, we should return no results.
|
// But in that case, we should return no results.
|
||||||
//
|
//
|
||||||
// The search is a placeholder search only if there are no tokens?
|
// The search is a placeholder search only if there are no tokens?
|
||||||
if graph.nodes.len() > 2 {
|
let documents_ids = if graph.nodes.len() > 2 {
|
||||||
let universe = resolve_maximally_reduced_query_graph(
|
universe = resolve_maximally_reduced_query_graph(
|
||||||
ctx,
|
ctx,
|
||||||
&universe,
|
&universe,
|
||||||
&graph,
|
&graph,
|
||||||
@ -259,7 +259,7 @@ pub fn execute_search<'ctx>(
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
let ranking_rules = get_ranking_rules_for_query_graph_search(ctx, terms_matching_strategy)?;
|
let ranking_rules = get_ranking_rules_for_query_graph_search(ctx, terms_matching_strategy)?;
|
||||||
bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)
|
bucket_sort(ctx, ranking_rules, &graph, &universe, from, length, query_graph_logger)?
|
||||||
} else {
|
} else {
|
||||||
let ranking_rules = get_ranking_rules_for_placeholder_search(ctx)?;
|
let ranking_rules = get_ranking_rules_for_placeholder_search(ctx)?;
|
||||||
bucket_sort(
|
bucket_sort(
|
||||||
@ -270,7 +270,22 @@ pub fn execute_search<'ctx>(
|
|||||||
from,
|
from,
|
||||||
length,
|
length,
|
||||||
placeholder_search_logger,
|
placeholder_search_logger,
|
||||||
)
|
)?
|
||||||
|
};
|
||||||
|
|
||||||
|
Ok(SearchResult {
|
||||||
|
// TODO: correct matching words
|
||||||
|
matching_words: MatchingWords::default(),
|
||||||
|
// TODO: candidates with distinct
|
||||||
|
candidates: universe,
|
||||||
|
documents_ids,
|
||||||
|
})
|
||||||
|
}
|
||||||
|
|
||||||
|
impl<'a> Search<'a> {
|
||||||
|
// TODO
|
||||||
|
pub fn execute_new(&self) -> Result<SearchResult> {
|
||||||
|
todo!()
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -329,7 +344,7 @@ mod tests {
|
|||||||
println!("{}us", elapsed.as_micros());
|
println!("{}us", elapsed.as_micros());
|
||||||
|
|
||||||
let _documents = index
|
let _documents = index
|
||||||
.documents(&txn, results.iter().copied())
|
.documents(&txn, results.documents_ids.iter().copied())
|
||||||
.unwrap()
|
.unwrap()
|
||||||
.into_iter()
|
.into_iter()
|
||||||
.map(|(id, obkv)| {
|
.map(|(id, obkv)| {
|
||||||
|
@ -4,8 +4,7 @@ use std::collections::btree_map::Entry;
|
|||||||
use std::collections::{BTreeMap, VecDeque};
|
use std::collections::{BTreeMap, VecDeque};
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
use super::dead_end_path_cache::DeadEndPathCache;
|
use super::{DeadEndsCache, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
|
||||||
use crate::search::new::interner::{Interned, MappedInterner};
|
use crate::search::new::interner::{Interned, MappedInterner};
|
||||||
use crate::search::new::query_graph::QueryNode;
|
use crate::search::new::query_graph::QueryNode;
|
||||||
use crate::search::new::small_bitmap::SmallBitmap;
|
use crate::search::new::small_bitmap::SmallBitmap;
|
||||||
@ -23,11 +22,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
from: Interned<QueryNode>,
|
from: Interned<QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>,
|
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>,
|
||||||
dead_end_path_cache: &mut DeadEndPathCache<G>,
|
dead_end_path_cache: &mut DeadEndsCache<G::Condition>,
|
||||||
mut visit: impl FnMut(
|
mut visit: impl FnMut(
|
||||||
&[Interned<G::Condition>],
|
&[Interned<G::Condition>],
|
||||||
&mut Self,
|
&mut Self,
|
||||||
&mut DeadEndPathCache<G>,
|
&mut DeadEndsCache<G::Condition>,
|
||||||
) -> Result<ControlFlow<()>>,
|
) -> Result<ControlFlow<()>>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
let _ = self.visit_paths_of_cost_rec(
|
let _ = self.visit_paths_of_cost_rec(
|
||||||
@ -38,7 +37,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
&mut visit,
|
&mut visit,
|
||||||
&mut vec![],
|
&mut vec![],
|
||||||
&mut SmallBitmap::for_interned_values_in(&self.conditions_interner),
|
&mut SmallBitmap::for_interned_values_in(&self.conditions_interner),
|
||||||
&mut dead_end_path_cache.conditions.clone(),
|
&mut dead_end_path_cache.forbidden.clone(),
|
||||||
)?;
|
)?;
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
@ -47,11 +46,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
from: Interned<QueryNode>,
|
from: Interned<QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>,
|
all_distances: &MappedInterner<Vec<(u16, SmallBitmap<G::Condition>)>, QueryNode>,
|
||||||
dead_end_path_cache: &mut DeadEndPathCache<G>,
|
dead_end_path_cache: &mut DeadEndsCache<G::Condition>,
|
||||||
visit: &mut impl FnMut(
|
visit: &mut impl FnMut(
|
||||||
&[Interned<G::Condition>],
|
&[Interned<G::Condition>],
|
||||||
&mut Self,
|
&mut Self,
|
||||||
&mut DeadEndPathCache<G>,
|
&mut DeadEndsCache<G::Condition>,
|
||||||
) -> Result<ControlFlow<()>>,
|
) -> Result<ControlFlow<()>>,
|
||||||
prev_conditions: &mut Vec<Interned<G::Condition>>,
|
prev_conditions: &mut Vec<Interned<G::Condition>>,
|
||||||
cur_path: &mut SmallBitmap<G::Condition>,
|
cur_path: &mut SmallBitmap<G::Condition>,
|
||||||
@ -74,7 +73,6 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
ControlFlow::Continue(_) => {}
|
ControlFlow::Continue(_) => {}
|
||||||
ControlFlow::Break(_) => return Ok(true),
|
ControlFlow::Break(_) => return Ok(true),
|
||||||
}
|
}
|
||||||
true
|
|
||||||
} else {
|
} else {
|
||||||
self.visit_paths_of_cost_rec(
|
self.visit_paths_of_cost_rec(
|
||||||
edge.dest_node,
|
edge.dest_node,
|
||||||
@ -85,7 +83,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
prev_conditions,
|
prev_conditions,
|
||||||
cur_path,
|
cur_path,
|
||||||
forbidden_conditions,
|
forbidden_conditions,
|
||||||
)?
|
)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Some(condition) => {
|
Some(condition) => {
|
||||||
@ -101,24 +99,20 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
}
|
}
|
||||||
cur_path.insert(condition);
|
cur_path.insert(condition);
|
||||||
prev_conditions.push(condition);
|
prev_conditions.push(condition);
|
||||||
|
|
||||||
let mut new_forbidden_conditions = forbidden_conditions.clone();
|
let mut new_forbidden_conditions = forbidden_conditions.clone();
|
||||||
new_forbidden_conditions
|
if let Some(next_forbidden) =
|
||||||
.union(dead_end_path_cache.condition_couples.get(condition));
|
dead_end_path_cache.forbidden_conditions_after_prefix(&prev_conditions)
|
||||||
dead_end_path_cache.prefixes.final_edges_after_prefix(
|
{
|
||||||
prev_conditions,
|
new_forbidden_conditions.union(&next_forbidden);
|
||||||
&mut |x| {
|
}
|
||||||
new_forbidden_conditions.insert(x);
|
|
||||||
},
|
if edge.dest_node == self.query_graph.end_node {
|
||||||
);
|
|
||||||
let next_any_valid = if edge.dest_node == self.query_graph.end_node {
|
|
||||||
any_valid = true;
|
any_valid = true;
|
||||||
let control_flow = visit(prev_conditions, self, dead_end_path_cache)?;
|
let control_flow = visit(prev_conditions, self, dead_end_path_cache)?;
|
||||||
match control_flow {
|
match control_flow {
|
||||||
ControlFlow::Continue(_) => {}
|
ControlFlow::Continue(_) => {}
|
||||||
ControlFlow::Break(_) => return Ok(true),
|
ControlFlow::Break(_) => return Ok(true),
|
||||||
}
|
}
|
||||||
true
|
|
||||||
} else {
|
} else {
|
||||||
self.visit_paths_of_cost_rec(
|
self.visit_paths_of_cost_rec(
|
||||||
edge.dest_node,
|
edge.dest_node,
|
||||||
@ -129,28 +123,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
prev_conditions,
|
prev_conditions,
|
||||||
cur_path,
|
cur_path,
|
||||||
&mut new_forbidden_conditions,
|
&mut new_forbidden_conditions,
|
||||||
)?
|
)?;
|
||||||
};
|
}
|
||||||
cur_path.remove(condition);
|
cur_path.remove(condition);
|
||||||
prev_conditions.pop();
|
prev_conditions.pop();
|
||||||
next_any_valid
|
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
any_valid |= next_any_valid;
|
|
||||||
|
|
||||||
if next_any_valid {
|
|
||||||
if dead_end_path_cache.path_is_dead_end(prev_conditions, cur_path) {
|
|
||||||
return Ok(any_valid);
|
|
||||||
}
|
|
||||||
forbidden_conditions.union(&dead_end_path_cache.conditions);
|
|
||||||
for prev_condition in prev_conditions.iter() {
|
|
||||||
forbidden_conditions
|
|
||||||
.union(dead_end_path_cache.condition_couples.get(*prev_condition));
|
|
||||||
}
|
|
||||||
dead_end_path_cache.prefixes.final_edges_after_prefix(prev_conditions, &mut |x| {
|
|
||||||
forbidden_conditions.insert(x);
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(any_valid)
|
Ok(any_valid)
|
||||||
|
@ -12,11 +12,16 @@ use crate::Result;
|
|||||||
pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
|
pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
|
||||||
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
// TODO: should be FxHashMap<Interned<EdgeCondition>, RoaringBitmap>
|
||||||
pub cache: FxHashMap<Interned<G::Condition>, RoaringBitmap>,
|
pub cache: FxHashMap<Interned<G::Condition>, RoaringBitmap>,
|
||||||
|
pub universe_length: u64,
|
||||||
_phantom: PhantomData<G>,
|
_phantom: PhantomData<G>,
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> {
|
impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
|
||||||
fn default() -> Self {
|
pub fn new(universe: &RoaringBitmap) -> Self {
|
||||||
Self { cache: Default::default(), _phantom: Default::default() }
|
Self {
|
||||||
|
cache: Default::default(),
|
||||||
|
_phantom: Default::default(),
|
||||||
|
universe_length: universe.len(),
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
|
impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
|
||||||
@ -33,6 +38,9 @@ impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
|
|||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<&'s RoaringBitmap> {
|
) -> Result<&'s RoaringBitmap> {
|
||||||
if self.cache.contains_key(&interned_condition) {
|
if self.cache.contains_key(&interned_condition) {
|
||||||
|
// TODO compare length of universe compared to the one in self
|
||||||
|
// if it is smaller, then update the value
|
||||||
|
|
||||||
// TODO: should we update the bitmap in the cache if the new universe
|
// TODO: should we update the bitmap in the cache if the new universe
|
||||||
// reduces it?
|
// reduces it?
|
||||||
// TODO: maybe have a generation: u32 to track every time the universe was
|
// TODO: maybe have a generation: u32 to track every time the universe was
|
||||||
|
@ -1,84 +1,83 @@
|
|||||||
use super::{path_set::PathSet, RankingRuleGraphTrait};
|
// use super::{path_set::PathSet, RankingRuleGraphTrait};
|
||||||
use crate::search::new::{
|
// use crate::search::new::{
|
||||||
interner::{FixedSizeInterner, Interned, MappedInterner},
|
// interner::{FixedSizeInterner, Interned, MappedInterner},
|
||||||
small_bitmap::SmallBitmap,
|
// small_bitmap::SmallBitmap,
|
||||||
};
|
// };
|
||||||
|
|
||||||
/// A cache which stores sufficient conditions for a path
|
// /// A cache which stores sufficient conditions for a path
|
||||||
/// to resolve to an empty set of candidates within the current
|
// /// to resolve to an empty set of candidates within the current
|
||||||
/// universe.
|
// /// universe.
|
||||||
pub struct DeadEndPathCache<G: RankingRuleGraphTrait> {
|
// pub struct DeadEndPathCache<G: RankingRuleGraphTrait> {
|
||||||
/// The set of edge conditions that resolve to no documents.
|
// /// The set of edge conditions that resolve to no documents.
|
||||||
pub conditions: SmallBitmap<G::Condition>,
|
// pub conditions: SmallBitmap<G::Condition>,
|
||||||
/// A set of path prefixes that resolve to no documents.
|
// /// A set of path prefixes that resolve to no documents.
|
||||||
pub prefixes: PathSet<G::Condition>,
|
// pub prefixes: PathSet<G::Condition>,
|
||||||
/// A set of empty couples of edge conditions that resolve to no documents.
|
// /// A set of empty couples of edge conditions that resolve to no documents.
|
||||||
pub condition_couples: MappedInterner<SmallBitmap<G::Condition>, G::Condition>,
|
// pub condition_couples: MappedInterner<SmallBitmap<G::Condition>, G::Condition>,
|
||||||
}
|
// }
|
||||||
impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
|
// impl<G: RankingRuleGraphTrait> Clone for DeadEndPathCache<G> {
|
||||||
fn clone(&self) -> Self {
|
// fn clone(&self) -> Self {
|
||||||
Self {
|
// Self {
|
||||||
conditions: self.conditions.clone(),
|
// conditions: self.conditions.clone(),
|
||||||
prefixes: self.prefixes.clone(),
|
// prefixes: self.prefixes.clone(),
|
||||||
condition_couples: self.condition_couples.clone(),
|
// condition_couples: self.condition_couples.clone(),
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
// impl<G: RankingRuleGraphTrait> DeadEndPathCache<G> {
|
||||||
/// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
// /// Create a new cache for a ranking rule graph containing at most `all_edges_len` edges.
|
||||||
pub fn new(all_conditions: &FixedSizeInterner<G::Condition>) -> Self {
|
// pub fn new(all_conditions: &FixedSizeInterner<G::Condition>) -> Self {
|
||||||
Self {
|
// Self {
|
||||||
conditions: SmallBitmap::for_interned_values_in(all_conditions),
|
// conditions: SmallBitmap::for_interned_values_in(all_conditions),
|
||||||
prefixes: PathSet::default(),
|
// prefixes: PathSet::default(),
|
||||||
condition_couples: all_conditions
|
// condition_couples: all_conditions
|
||||||
.map(|_| SmallBitmap::for_interned_values_in(all_conditions)),
|
// .map(|_| SmallBitmap::for_interned_values_in(all_conditions)),
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
|
|
||||||
/// Store in the cache that every path containing the given edge resolves to no documents.
|
// /// Store in the cache that every path containing the given edge resolves to no documents.
|
||||||
pub fn add_condition(&mut self, condition: Interned<G::Condition>) {
|
// pub fn add_condition(&mut self, condition: Interned<G::Condition>) {
|
||||||
self.conditions.insert(condition);
|
// self.conditions.insert(condition);
|
||||||
self.condition_couples.get_mut(condition).clear();
|
// self.condition_couples.get_mut(condition).clear();
|
||||||
self.prefixes.remove_edge(condition);
|
// self.prefixes.remove_edge(condition);
|
||||||
for (_, edges2) in self.condition_couples.iter_mut() {
|
// for (_, edges2) in self.condition_couples.iter_mut() {
|
||||||
edges2.remove(condition);
|
// edges2.remove(condition);
|
||||||
}
|
// }
|
||||||
}
|
// }
|
||||||
/// Store in the cache that every path containing the given prefix resolves to no documents.
|
// /// Store in the cache that every path containing the given prefix resolves to no documents.
|
||||||
pub fn add_prefix(&mut self, prefix: &[Interned<G::Condition>]) {
|
// pub fn add_prefix(&mut self, prefix: &[Interned<G::Condition>]) {
|
||||||
// TODO: typed PathSet
|
// // TODO: typed PathSet
|
||||||
self.prefixes.insert(prefix.iter().copied());
|
// self.prefixes.insert(prefix.iter().copied());
|
||||||
}
|
// }
|
||||||
|
|
||||||
/// Store in the cache that every path containing the two given edges resolves to no documents.
|
// /// Store in the cache that every path containing the two given edges resolves to no documents.
|
||||||
pub fn add_condition_couple(
|
// pub fn add_condition_couple(
|
||||||
&mut self,
|
// &mut self,
|
||||||
edge1: Interned<G::Condition>,
|
// edge1: Interned<G::Condition>,
|
||||||
edge2: Interned<G::Condition>,
|
// edge2: Interned<G::Condition>,
|
||||||
) {
|
// ) {
|
||||||
self.condition_couples.get_mut(edge1).insert(edge2);
|
// self.condition_couples.get_mut(edge1).insert(edge2);
|
||||||
}
|
// }
|
||||||
|
|
||||||
/// Returns true if the cache can determine that the given path resolves to no documents.
|
// /// Returns true if the cache can determine that the given path resolves to no documents.
|
||||||
pub fn path_is_dead_end(
|
// pub fn path_is_dead_end(
|
||||||
&self,
|
// &self,
|
||||||
path: &[Interned<G::Condition>],
|
// path: &[Interned<G::Condition>],
|
||||||
path_bitmap: &SmallBitmap<G::Condition>,
|
// path_bitmap: &SmallBitmap<G::Condition>,
|
||||||
) -> bool {
|
// ) -> bool {
|
||||||
if path_bitmap.intersects(&self.conditions) {
|
// if path_bitmap.intersects(&self.conditions) {
|
||||||
return true;
|
// return true;
|
||||||
}
|
// }
|
||||||
for condition in path.iter() {
|
// for condition in path.iter() {
|
||||||
// TODO: typed path
|
// let forbidden_other_edges = self.condition_couples.get(*condition);
|
||||||
let forbidden_other_edges = self.condition_couples.get(*condition);
|
// if path_bitmap.intersects(forbidden_other_edges) {
|
||||||
if path_bitmap.intersects(forbidden_other_edges) {
|
// return true;
|
||||||
return true;
|
// }
|
||||||
}
|
// }
|
||||||
}
|
// if self.prefixes.contains_prefix_of_path(path) {
|
||||||
if self.prefixes.contains_prefix_of_path(path) {
|
// return true;
|
||||||
return true;
|
// }
|
||||||
}
|
// false
|
||||||
false
|
// }
|
||||||
}
|
// }
|
||||||
}
|
|
||||||
|
@ -20,7 +20,8 @@ use std::collections::HashSet;
|
|||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
|
|
||||||
pub use condition_docids_cache::ConditionDocIdsCache;
|
pub use condition_docids_cache::ConditionDocIdsCache;
|
||||||
pub use dead_end_path_cache::DeadEndPathCache;
|
// pub use dead_end_path_cache::DeadEndPathCache;
|
||||||
|
pub use path_set::DeadEndsCache;
|
||||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
pub use typo::{TypoCondition, TypoGraph};
|
pub use typo::{TypoCondition, TypoGraph};
|
||||||
@ -113,7 +114,7 @@ pub trait RankingRuleGraphTrait: Sized {
|
|||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &RankingRuleGraph<Self>,
|
graph: &RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<Interned<Self::Condition>>],
|
paths: &[Vec<Interned<Self::Condition>>],
|
||||||
dead_end_path_cache: &DeadEndPathCache<Self>,
|
dead_end_path_cache: &DeadEndsCache<Self::Condition>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<Self::Condition>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<Self::Condition>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
|
@ -2,104 +2,165 @@
|
|||||||
// For the empty_prefixes field in the EmptyPathsCache only :/
|
// For the empty_prefixes field in the EmptyPathsCache only :/
|
||||||
// but it could be used for more, like efficient computing of a set of paths
|
// but it could be used for more, like efficient computing of a set of paths
|
||||||
|
|
||||||
use crate::search::new::interner::Interned;
|
use crate::search::new::{
|
||||||
|
interner::{FixedSizeInterner, Interned},
|
||||||
|
small_bitmap::SmallBitmap,
|
||||||
|
};
|
||||||
|
|
||||||
/// A set of `Vec<Interned<T>>` implemented as a prefix tree.
|
pub struct DeadEndsCache<T> {
|
||||||
pub struct PathSet<T> {
|
|
||||||
nodes: Vec<(Interned<T>, Self)>,
|
nodes: Vec<(Interned<T>, Self)>,
|
||||||
is_end: bool,
|
pub forbidden: SmallBitmap<T>,
|
||||||
}
|
}
|
||||||
|
impl<T> DeadEndsCache<T> {
|
||||||
impl<T> Clone for PathSet<T> {
|
pub fn new(for_interner: &FixedSizeInterner<T>) -> Self {
|
||||||
fn clone(&self) -> Self {
|
Self { nodes: vec![], forbidden: SmallBitmap::for_interned_values_in(for_interner) }
|
||||||
Self { nodes: self.nodes.clone(), is_end: self.is_end }
|
|
||||||
}
|
}
|
||||||
}
|
pub fn forbid_condition(&mut self, condition: Interned<T>) {
|
||||||
|
self.forbidden.insert(condition);
|
||||||
impl<T> std::fmt::Debug for PathSet<T> {
|
|
||||||
fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
|
||||||
f.debug_struct("PathSet").field("nodes", &self.nodes).field("is_end", &self.is_end).finish()
|
|
||||||
}
|
}
|
||||||
}
|
fn advance(&mut self, condition: Interned<T>) -> Option<&mut Self> {
|
||||||
|
for (e, next_node) in &mut self.nodes {
|
||||||
impl<T> Default for PathSet<T> {
|
if condition == *e {
|
||||||
fn default() -> Self {
|
return Some(next_node);
|
||||||
Self { nodes: Default::default(), is_end: Default::default() }
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
impl<T> PathSet<T> {
|
|
||||||
pub fn insert(&mut self, mut edges: impl Iterator<Item = Interned<T>>) {
|
|
||||||
match edges.next() {
|
|
||||||
None => {
|
|
||||||
self.is_end = true;
|
|
||||||
}
|
|
||||||
Some(first_edge) => {
|
|
||||||
for (edge, next_node) in &mut self.nodes {
|
|
||||||
if edge == &first_edge {
|
|
||||||
return next_node.insert(edges);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
let mut rest = PathSet::default();
|
|
||||||
rest.insert(edges);
|
|
||||||
self.nodes.push((first_edge, rest));
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
None
|
||||||
}
|
}
|
||||||
|
pub fn forbidden_conditions_after_prefix(
|
||||||
pub fn remove_edge(&mut self, forbidden_edge: Interned<T>) {
|
&mut self,
|
||||||
let mut i = 0;
|
mut prefix: &[Interned<T>],
|
||||||
while i < self.nodes.len() {
|
) -> Option<SmallBitmap<T>> {
|
||||||
let should_remove = if self.nodes[i].0 == forbidden_edge {
|
let mut cursor = self;
|
||||||
true
|
for c in prefix.iter() {
|
||||||
} else if !self.nodes[i].1.nodes.is_empty() {
|
if let Some(next) = cursor.advance(*c) {
|
||||||
self.nodes[i].1.remove_edge(forbidden_edge);
|
cursor = next;
|
||||||
self.nodes[i].1.nodes.is_empty()
|
|
||||||
} else {
|
} else {
|
||||||
false
|
return None;
|
||||||
};
|
|
||||||
if should_remove {
|
|
||||||
self.nodes.remove(i);
|
|
||||||
} else {
|
|
||||||
i += 1;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Some(cursor.forbidden.clone())
|
||||||
}
|
}
|
||||||
|
pub fn forbid_condition_after_prefix(
|
||||||
pub fn final_edges_after_prefix(
|
&mut self,
|
||||||
&self,
|
mut prefix: impl Iterator<Item = Interned<T>>,
|
||||||
prefix: &[Interned<T>],
|
forbidden: Interned<T>,
|
||||||
visit: &mut impl FnMut(Interned<T>),
|
|
||||||
) {
|
) {
|
||||||
let [first_edge, remaining_prefix @ ..] = prefix else {
|
match prefix.next() {
|
||||||
for node in self.nodes.iter() {
|
None => {
|
||||||
if node.1.is_end {
|
self.forbidden.insert(forbidden);
|
||||||
visit(node.0)
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
return
|
Some(first_condition) => {
|
||||||
};
|
for (condition, next_node) in &mut self.nodes {
|
||||||
for (edge, rest) in self.nodes.iter() {
|
if condition == &first_condition {
|
||||||
if edge == first_edge {
|
return next_node.forbid_condition_after_prefix(prefix, forbidden);
|
||||||
return rest.final_edges_after_prefix(remaining_prefix, visit);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn contains_prefix_of_path(&self, path: &[Interned<T>]) -> bool {
|
|
||||||
if self.is_end {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
match path {
|
|
||||||
[] => false,
|
|
||||||
[first_edge, remaining_path @ ..] => {
|
|
||||||
for (edge, rest) in self.nodes.iter() {
|
|
||||||
if edge == first_edge {
|
|
||||||
return rest.contains_prefix_of_path(remaining_path);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
false
|
let mut rest = DeadEndsCache {
|
||||||
|
nodes: vec![],
|
||||||
|
forbidden: SmallBitmap::new(self.forbidden.universe_length()),
|
||||||
|
};
|
||||||
|
rest.forbid_condition_after_prefix(prefix, forbidden);
|
||||||
|
self.nodes.push((first_condition, rest));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
// /// A set of `Vec<Interned<T>>` implemented as a prefix tree.
|
||||||
|
// pub struct PathSet<T> {
|
||||||
|
// nodes: Vec<(Interned<T>, Self)>,
|
||||||
|
// is_end: bool,
|
||||||
|
// }
|
||||||
|
|
||||||
|
// impl<T> Clone for PathSet<T> {
|
||||||
|
// fn clone(&self) -> Self {
|
||||||
|
// Self { nodes: self.nodes.clone(), is_end: self.is_end }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// impl<T> std::fmt::Debug for PathSet<T> {
|
||||||
|
// fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
|
||||||
|
// f.debug_struct("PathSet").field("nodes", &self.nodes).field("is_end", &self.is_end).finish()
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// impl<T> Default for PathSet<T> {
|
||||||
|
// fn default() -> Self {
|
||||||
|
// Self { nodes: Default::default(), is_end: Default::default() }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// impl<T> PathSet<T> {
|
||||||
|
// pub fn insert(&mut self, mut conditions: impl Iterator<Item = Interned<T>>) {
|
||||||
|
// match conditions.next() {
|
||||||
|
// None => {
|
||||||
|
// self.is_end = true;
|
||||||
|
// }
|
||||||
|
// Some(first_condition) => {
|
||||||
|
// for (condition, next_node) in &mut self.nodes {
|
||||||
|
// if condition == &first_condition {
|
||||||
|
// return next_node.insert(conditions);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// let mut rest = PathSet::default();
|
||||||
|
// rest.insert(conditions);
|
||||||
|
// self.nodes.push((first_condition, rest));
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// pub fn remove_condition(&mut self, forbidden_condition: Interned<T>) {
|
||||||
|
// let mut i = 0;
|
||||||
|
// while i < self.nodes.len() {
|
||||||
|
// let should_remove = if self.nodes[i].0 == forbidden_condition {
|
||||||
|
// true
|
||||||
|
// } else if !self.nodes[i].1.nodes.is_empty() {
|
||||||
|
// self.nodes[i].1.remove_condition(forbidden_condition);
|
||||||
|
// self.nodes[i].1.nodes.is_empty()
|
||||||
|
// } else {
|
||||||
|
// false
|
||||||
|
// };
|
||||||
|
// if should_remove {
|
||||||
|
// self.nodes.remove(i);
|
||||||
|
// } else {
|
||||||
|
// i += 1;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// pub fn final_conditions_after_prefix(
|
||||||
|
// &self,
|
||||||
|
// prefix: &[Interned<T>],
|
||||||
|
// visit: &mut impl FnMut(Interned<T>),
|
||||||
|
// ) {
|
||||||
|
// let [first_condition, remaining_prefix @ ..] = prefix else {
|
||||||
|
// for node in self.nodes.iter() {
|
||||||
|
// if node.1.is_end {
|
||||||
|
// visit(node.0)
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// return
|
||||||
|
// };
|
||||||
|
// for (condition, rest) in self.nodes.iter() {
|
||||||
|
// if condition == first_condition {
|
||||||
|
// return rest.final_conditions_after_prefix(remaining_prefix, visit);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
// pub fn contains_prefix_of_path(&self, path: &[Interned<T>]) -> bool {
|
||||||
|
// if self.is_end {
|
||||||
|
// return true;
|
||||||
|
// }
|
||||||
|
// match path {
|
||||||
|
// [] => false,
|
||||||
|
// [first_condition, remaining_path @ ..] => {
|
||||||
|
// for (condition, rest) in self.nodes.iter() {
|
||||||
|
// if condition == first_condition {
|
||||||
|
// return rest.contains_prefix_of_path(remaining_path);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// false
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
@ -6,8 +6,7 @@ use std::iter::FromIterator;
|
|||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::dead_end_path_cache::DeadEndPathCache;
|
use super::{RankingRuleGraph, RankingRuleGraphTrait, DeadEndsCache};
|
||||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
|
||||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||||
use crate::search::new::logger::SearchLogger;
|
use crate::search::new::logger::SearchLogger;
|
||||||
use crate::search::new::query_term::{Phrase, QueryTerm};
|
use crate::search::new::query_term::{Phrase, QueryTerm};
|
||||||
@ -67,7 +66,7 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
|||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &RankingRuleGraph<Self>,
|
graph: &RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<Interned<ProximityCondition>>],
|
paths: &[Vec<Interned<ProximityCondition>>],
|
||||||
dead_end_path_cache: &DeadEndPathCache<Self>,
|
dead_end_path_cache: &DeadEndsCache<Self::Condition>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<ProximityCondition>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
|
@ -1,7 +1,6 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::dead_end_path_cache::DeadEndPathCache;
|
use super::{RankingRuleGraph, RankingRuleGraphTrait, DeadEndsCache};
|
||||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
|
||||||
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
use crate::search::new::interner::{DedupInterner, Interned, MappedInterner};
|
||||||
use crate::search::new::logger::SearchLogger;
|
use crate::search::new::logger::SearchLogger;
|
||||||
use crate::search::new::query_graph::QueryNodeData;
|
use crate::search::new::query_graph::QueryNodeData;
|
||||||
@ -137,7 +136,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &RankingRuleGraph<Self>,
|
graph: &RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<Interned<TypoCondition>>],
|
paths: &[Vec<Interned<TypoCondition>>],
|
||||||
dead_end_path_cache: &DeadEndPathCache<Self>,
|
dead_end_path_cache: &DeadEndsCache<TypoCondition>,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>,
|
distances: &MappedInterner<Vec<(u16, SmallBitmap<TypoCondition>)>, QueryNode>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
|
@ -28,6 +28,12 @@ impl<T> SmallBitmap<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
pub fn universe_length(&self) -> u16 {
|
||||||
|
match &self.internal {
|
||||||
|
SmallBitmapInternal::Tiny(_) => 64,
|
||||||
|
SmallBitmapInternal::Small(xs) => 64 * xs.len() as u16,
|
||||||
|
}
|
||||||
|
}
|
||||||
pub fn from_iter(
|
pub fn from_iter(
|
||||||
xs: impl Iterator<Item = Interned<T>>,
|
xs: impl Iterator<Item = Interned<T>>,
|
||||||
for_interner: &FixedSizeInterner<T>,
|
for_interner: &FixedSizeInterner<T>,
|
||||||
|
Loading…
Reference in New Issue
Block a user