mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 06:44:27 +01:00
Improve performance of the cheapest path finder algorithm
This commit is contained in:
parent
59b12fca87
commit
79001b9c97
@ -6,7 +6,7 @@ use std::path::Path;
|
|||||||
use heed::EnvOpenOptions;
|
use heed::EnvOpenOptions;
|
||||||
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||||
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||||
use milli::{Criterion, Index, Object};
|
use milli::{Index, Object};
|
||||||
|
|
||||||
fn usage(error: &str, program_name: &str) -> String {
|
fn usage(error: &str, program_name: &str) -> String {
|
||||||
format!(
|
format!(
|
||||||
@ -52,18 +52,10 @@ fn main() -> Result<(), Box<dyn Error>> {
|
|||||||
let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
|
let filterable_fields = filterable_fields.iter().map(|s| s.to_string()).collect();
|
||||||
builder.set_filterable_fields(filterable_fields);
|
builder.set_filterable_fields(filterable_fields);
|
||||||
|
|
||||||
builder.set_criteria(vec![
|
|
||||||
Criterion::Words,
|
|
||||||
Criterion::Typo,
|
|
||||||
Criterion::Proximity,
|
|
||||||
Criterion::Attribute,
|
|
||||||
]);
|
|
||||||
builder.execute(|_| (), || false).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
|
|
||||||
let config = IndexerConfig::default();
|
let config = IndexerConfig::default();
|
||||||
let mut indexing_config = IndexDocumentsConfig::default();
|
let indexing_config = IndexDocumentsConfig::default();
|
||||||
|
|
||||||
indexing_config.autogenerate_docids = true;
|
|
||||||
|
|
||||||
let builder =
|
let builder =
|
||||||
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
IndexDocuments::new(&mut wtxn, &index, &config, indexing_config, |_| (), || false).unwrap();
|
||||||
|
@ -24,8 +24,8 @@ fn main() {
|
|||||||
Criterion::Typo,
|
Criterion::Typo,
|
||||||
Criterion::Proximity,
|
Criterion::Proximity,
|
||||||
Criterion::Attribute,
|
Criterion::Attribute,
|
||||||
|
Criterion::Sort,
|
||||||
Criterion::Exactness,
|
Criterion::Exactness,
|
||||||
// Criterion::Asc("release_date".to_owned()),
|
|
||||||
]);
|
]);
|
||||||
|
|
||||||
builder.execute(|_| (), || false).unwrap();
|
builder.execute(|_| (), || false).unwrap();
|
||||||
|
@ -72,7 +72,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
|
|||||||
/// Update the universes accordingly and inform the logger.
|
/// Update the universes accordingly and inform the logger.
|
||||||
macro_rules! back {
|
macro_rules! back {
|
||||||
() => {
|
() => {
|
||||||
assert!(ranking_rule_universes[cur_ranking_rule_index].is_empty());
|
assert!(
|
||||||
|
ranking_rule_universes[cur_ranking_rule_index].is_empty(),
|
||||||
|
"The ranking rule {} did not sort its bucket exhaustively",
|
||||||
|
ranking_rules[cur_ranking_rule_index].id()
|
||||||
|
);
|
||||||
logger.end_iteration_ranking_rule(
|
logger.end_iteration_ranking_rule(
|
||||||
cur_ranking_rule_index,
|
cur_ranking_rule_index,
|
||||||
ranking_rules[cur_ranking_rule_index].as_ref(),
|
ranking_rules[cur_ranking_rule_index].as_ref(),
|
||||||
|
@ -36,6 +36,7 @@ That is we find the documents where either:
|
|||||||
- OR: `pretty` is 2-close to `house` AND `house` is 1-close to `by`
|
- OR: `pretty` is 2-close to `house` AND `house` is 1-close to `by`
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
use std::collections::BTreeSet;
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -99,6 +100,8 @@ impl<G: RankingRuleGraphTrait> GraphBasedRankingRule<G> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
static mut COUNT_PATHS: usize = 0;
|
||||||
|
|
||||||
/// The internal state of a graph-based ranking rule during iteration
|
/// The internal state of a graph-based ranking rule during iteration
|
||||||
pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
||||||
/// The current graph
|
/// The current graph
|
||||||
@ -110,7 +113,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
/// A structure giving the list of possible costs from each node to the end node
|
/// A structure giving the list of possible costs from each node to the end node
|
||||||
all_costs: MappedInterner<QueryNode, Vec<u64>>,
|
all_costs: MappedInterner<QueryNode, Vec<u64>>,
|
||||||
/// An index in the first element of `all_distances`, giving the cost of the next bucket
|
/// An index in the first element of `all_distances`, giving the cost of the next bucket
|
||||||
cur_distance_idx: usize,
|
cur_cost: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
|
impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBasedRankingRule<G> {
|
||||||
@ -160,7 +163,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
conditions_cache: condition_docids_cache,
|
conditions_cache: condition_docids_cache,
|
||||||
dead_ends_cache,
|
dead_ends_cache,
|
||||||
all_costs,
|
all_costs,
|
||||||
cur_distance_idx: 0,
|
cur_cost: 0,
|
||||||
};
|
};
|
||||||
|
|
||||||
self.state = Some(state);
|
self.state = Some(state);
|
||||||
@ -181,16 +184,16 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
// should never happen
|
// should never happen
|
||||||
let mut state = self.state.take().unwrap();
|
let mut state = self.state.take().unwrap();
|
||||||
|
|
||||||
// If the cur_distance_idx does not point to a valid cost in the `all_distances`
|
// Retrieve the cost of the paths to compute
|
||||||
// structure, then we have computed all the buckets and can return.
|
let Some(&cost) = state
|
||||||
if state.cur_distance_idx >= state.all_costs.get(state.graph.query_graph.root_node).len() {
|
.all_costs
|
||||||
|
.get(state.graph.query_graph.root_node)
|
||||||
|
.iter()
|
||||||
|
.find(|c| **c >= state.cur_cost) else {
|
||||||
self.state = None;
|
self.state = None;
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
};
|
||||||
|
state.cur_cost = cost + 1;
|
||||||
// Retrieve the cost of the paths to compute
|
|
||||||
let cost = state.all_costs.get(state.graph.query_graph.root_node)[state.cur_distance_idx];
|
|
||||||
state.cur_distance_idx += 1;
|
|
||||||
|
|
||||||
let mut bucket = RoaringBitmap::new();
|
let mut bucket = RoaringBitmap::new();
|
||||||
|
|
||||||
@ -199,7 +202,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
conditions_cache: condition_docids_cache,
|
conditions_cache: condition_docids_cache,
|
||||||
dead_ends_cache,
|
dead_ends_cache,
|
||||||
all_costs,
|
all_costs,
|
||||||
cur_distance_idx: _,
|
cur_cost: _,
|
||||||
} = &mut state;
|
} = &mut state;
|
||||||
|
|
||||||
let mut universe = universe.clone();
|
let mut universe = universe.clone();
|
||||||
@ -216,9 +219,34 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
// the number of future candidate paths given by that same function.
|
// the number of future candidate paths given by that same function.
|
||||||
|
|
||||||
let mut subpaths_docids: Vec<(Interned<G::Condition>, RoaringBitmap)> = vec![];
|
let mut subpaths_docids: Vec<(Interned<G::Condition>, RoaringBitmap)> = vec![];
|
||||||
|
let mut at_least_one = false;
|
||||||
|
|
||||||
|
// unsafe {
|
||||||
|
// if COUNT_PATHS >= 1489 && COUNT_PATHS < 1491 {
|
||||||
|
// println!("COUNT_PATHS {COUNT_PATHS} COST {cost}, NODES {COUNT_VISITED_NODES}, UNIVERSE {}", universe.len());
|
||||||
|
// // let all_costs = all_costs.get(graph.query_graph.root_node);
|
||||||
|
// // println!("{all_costs:?}");
|
||||||
|
// dead_ends_cache.debug_print(0);
|
||||||
|
// println!("{universe:?}");
|
||||||
|
|
||||||
|
// println!("==================");
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
let mut nodes_with_removed_outgoing_conditions = BTreeSet::new();
|
||||||
let visitor = PathVisitor::new(cost, graph, all_costs, dead_ends_cache);
|
let visitor = PathVisitor::new(cost, graph, all_costs, dead_ends_cache);
|
||||||
|
|
||||||
visitor.visit_paths(&mut |path, graph, dead_ends_cache| {
|
visitor.visit_paths(&mut |path, graph, dead_ends_cache| {
|
||||||
|
unsafe {
|
||||||
|
COUNT_PATHS += 1;
|
||||||
|
}
|
||||||
|
// if self.id == "position" {
|
||||||
|
// at_least_one = true;
|
||||||
|
// print!(".");
|
||||||
|
// }
|
||||||
|
// if self.id == "fid" {
|
||||||
|
at_least_one = true;
|
||||||
|
// print!("!");
|
||||||
|
// }
|
||||||
considered_paths.push(path.to_vec());
|
considered_paths.push(path.to_vec());
|
||||||
// If the universe is empty, stop exploring the graph, since no docids will ever be found anymore.
|
// If the universe is empty, stop exploring the graph, since no docids will ever be found anymore.
|
||||||
if universe.is_empty() {
|
if universe.is_empty() {
|
||||||
@ -243,7 +271,6 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
};
|
};
|
||||||
// Then for the remaining of the path, we continue computing docids.
|
// Then for the remaining of the path, we continue computing docids.
|
||||||
for latest_condition in path[idx_of_first_different_condition..].iter().copied() {
|
for latest_condition in path[idx_of_first_different_condition..].iter().copied() {
|
||||||
// The visit_path_condition will stop
|
|
||||||
let success = visit_path_condition(
|
let success = visit_path_condition(
|
||||||
ctx,
|
ctx,
|
||||||
graph,
|
graph,
|
||||||
@ -251,6 +278,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
dead_ends_cache,
|
dead_ends_cache,
|
||||||
condition_docids_cache,
|
condition_docids_cache,
|
||||||
&mut subpaths_docids,
|
&mut subpaths_docids,
|
||||||
|
&mut nodes_with_removed_outgoing_conditions,
|
||||||
latest_condition,
|
latest_condition,
|
||||||
)?;
|
)?;
|
||||||
if !success {
|
if !success {
|
||||||
@ -281,7 +309,11 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
Ok(ControlFlow::Continue(()))
|
Ok(ControlFlow::Continue(()))
|
||||||
}
|
}
|
||||||
})?;
|
})?;
|
||||||
|
// if at_least_one {
|
||||||
|
// unsafe {
|
||||||
|
// println!("\n===== {id} COST: {cost} ==== PATHS: {COUNT_PATHS} ==== NODES: {COUNT_VISITED_NODES} ===== UNIVERSE: {universe}", id=self.id, universe=universe.len());
|
||||||
|
// }
|
||||||
|
// }
|
||||||
logger.log_internal_state(graph);
|
logger.log_internal_state(graph);
|
||||||
logger.log_internal_state(&good_paths);
|
logger.log_internal_state(&good_paths);
|
||||||
|
|
||||||
@ -305,6 +337,10 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
|
|
||||||
let next_query_graph = QueryGraph::build_from_paths(paths);
|
let next_query_graph = QueryGraph::build_from_paths(paths);
|
||||||
|
|
||||||
|
if !nodes_with_removed_outgoing_conditions.is_empty() {
|
||||||
|
graph.update_all_costs_before_nodes(&nodes_with_removed_outgoing_conditions, all_costs);
|
||||||
|
}
|
||||||
|
|
||||||
self.state = Some(state);
|
self.state = Some(state);
|
||||||
|
|
||||||
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket }))
|
Ok(Some(RankingRuleOutput { query: next_query_graph, candidates: bucket }))
|
||||||
@ -321,6 +357,7 @@ impl<'ctx, G: RankingRuleGraphTrait> RankingRule<'ctx, QueryGraph> for GraphBase
|
|||||||
|
|
||||||
/// Returns false if the intersection between the condition
|
/// Returns false if the intersection between the condition
|
||||||
/// docids and the previous path docids is empty.
|
/// docids and the previous path docids is empty.
|
||||||
|
#[allow(clippy::too_many_arguments)]
|
||||||
fn visit_path_condition<G: RankingRuleGraphTrait>(
|
fn visit_path_condition<G: RankingRuleGraphTrait>(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
graph: &mut RankingRuleGraph<G>,
|
graph: &mut RankingRuleGraph<G>,
|
||||||
@ -328,6 +365,7 @@ fn visit_path_condition<G: RankingRuleGraphTrait>(
|
|||||||
dead_ends_cache: &mut DeadEndsCache<G::Condition>,
|
dead_ends_cache: &mut DeadEndsCache<G::Condition>,
|
||||||
condition_docids_cache: &mut ConditionDocIdsCache<G>,
|
condition_docids_cache: &mut ConditionDocIdsCache<G>,
|
||||||
subpath: &mut Vec<(Interned<G::Condition>, RoaringBitmap)>,
|
subpath: &mut Vec<(Interned<G::Condition>, RoaringBitmap)>,
|
||||||
|
nodes_with_removed_outgoing_conditions: &mut BTreeSet<Interned<QueryNode>>,
|
||||||
latest_condition: Interned<G::Condition>,
|
latest_condition: Interned<G::Condition>,
|
||||||
) -> Result<bool> {
|
) -> Result<bool> {
|
||||||
let condition_docids = &condition_docids_cache
|
let condition_docids = &condition_docids_cache
|
||||||
@ -337,7 +375,8 @@ fn visit_path_condition<G: RankingRuleGraphTrait>(
|
|||||||
// 1. Store in the cache that this edge is empty for this universe
|
// 1. Store in the cache that this edge is empty for this universe
|
||||||
dead_ends_cache.forbid_condition(latest_condition);
|
dead_ends_cache.forbid_condition(latest_condition);
|
||||||
// 2. remove all the edges with this condition from the ranking rule graph
|
// 2. remove all the edges with this condition from the ranking rule graph
|
||||||
graph.remove_edges_with_condition(latest_condition);
|
let source_nodes = graph.remove_edges_with_condition(latest_condition);
|
||||||
|
nodes_with_removed_outgoing_conditions.extend(source_nodes);
|
||||||
return Ok(false);
|
return Ok(false);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -100,16 +100,21 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
|||||||
let ControlFlow::Continue(next_any_valid) = cf else {
|
let ControlFlow::Continue(next_any_valid) = cf else {
|
||||||
return Ok(ControlFlow::Break(()));
|
return Ok(ControlFlow::Break(()));
|
||||||
};
|
};
|
||||||
|
any_valid |= next_any_valid;
|
||||||
if next_any_valid {
|
if next_any_valid {
|
||||||
|
// backtrack as much as possible if a valid path was found and the dead_ends_cache
|
||||||
|
// was updated such that the current prefix is now invalid
|
||||||
self.forbidden_conditions = ctx
|
self.forbidden_conditions = ctx
|
||||||
.dead_ends_cache
|
.dead_ends_cache
|
||||||
.forbidden_conditions_for_all_prefixes_up_to(self.path.iter().copied());
|
.forbidden_conditions_for_all_prefixes_up_to(self.path.iter().copied());
|
||||||
if self.visited_conditions.intersects(&self.forbidden_conditions) {
|
if self.visited_conditions.intersects(&self.forbidden_conditions) {
|
||||||
break;
|
return Ok(ControlFlow::Continue(true));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
any_valid |= next_any_valid;
|
|
||||||
}
|
}
|
||||||
|
// if there wasn't any valid path from this node to the end node, then
|
||||||
|
// this node is a dead end **for this specific cost**.
|
||||||
|
// we could encode this in the dead-ends cache
|
||||||
|
|
||||||
Ok(ControlFlow::Continue(any_valid))
|
Ok(ControlFlow::Continue(any_valid))
|
||||||
}
|
}
|
||||||
@ -117,7 +122,7 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
|||||||
fn visit_no_condition(
|
fn visit_no_condition(
|
||||||
&mut self,
|
&mut self,
|
||||||
dest_node: Interned<QueryNode>,
|
dest_node: Interned<QueryNode>,
|
||||||
edge_forbidden_nodes: &SmallBitmap<QueryNode>,
|
edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
|
||||||
visit: VisitFn<G>,
|
visit: VisitFn<G>,
|
||||||
ctx: &mut VisitorContext<G>,
|
ctx: &mut VisitorContext<G>,
|
||||||
) -> Result<ControlFlow<(), bool>> {
|
) -> Result<ControlFlow<(), bool>> {
|
||||||
@ -137,7 +142,7 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
|||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
let old_fbct = self.forbidden_conditions_to_nodes.clone();
|
let old_fbct = self.forbidden_conditions_to_nodes.clone();
|
||||||
self.forbidden_conditions_to_nodes.union(edge_forbidden_nodes);
|
self.forbidden_conditions_to_nodes.union(edge_new_nodes_to_skip);
|
||||||
let cf = self.visit_node(dest_node, visit, ctx)?;
|
let cf = self.visit_node(dest_node, visit, ctx)?;
|
||||||
self.forbidden_conditions_to_nodes = old_fbct;
|
self.forbidden_conditions_to_nodes = old_fbct;
|
||||||
Ok(cf)
|
Ok(cf)
|
||||||
@ -147,14 +152,14 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
condition: Interned<G::Condition>,
|
condition: Interned<G::Condition>,
|
||||||
dest_node: Interned<QueryNode>,
|
dest_node: Interned<QueryNode>,
|
||||||
edge_forbidden_nodes: &SmallBitmap<QueryNode>,
|
edge_new_nodes_to_skip: &SmallBitmap<QueryNode>,
|
||||||
visit: VisitFn<G>,
|
visit: VisitFn<G>,
|
||||||
ctx: &mut VisitorContext<G>,
|
ctx: &mut VisitorContext<G>,
|
||||||
) -> Result<ControlFlow<(), bool>> {
|
) -> Result<ControlFlow<(), bool>> {
|
||||||
assert!(dest_node != ctx.graph.query_graph.end_node);
|
assert!(dest_node != ctx.graph.query_graph.end_node);
|
||||||
|
|
||||||
if self.forbidden_conditions_to_nodes.contains(dest_node)
|
if self.forbidden_conditions_to_nodes.contains(dest_node)
|
||||||
|| edge_forbidden_nodes.intersects(&self.visited_nodes)
|
|| edge_new_nodes_to_skip.intersects(&self.visited_nodes)
|
||||||
{
|
{
|
||||||
return Ok(ControlFlow::Continue(false));
|
return Ok(ControlFlow::Continue(false));
|
||||||
}
|
}
|
||||||
@ -162,11 +167,13 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
|||||||
return Ok(ControlFlow::Continue(false));
|
return Ok(ControlFlow::Continue(false));
|
||||||
}
|
}
|
||||||
|
|
||||||
if ctx
|
// Checking that from the destination node, there is at least
|
||||||
|
// one cost that we can visit that corresponds to our remaining budget.
|
||||||
|
if !ctx
|
||||||
.all_costs_from_node
|
.all_costs_from_node
|
||||||
.get(dest_node)
|
.get(dest_node)
|
||||||
.iter()
|
.iter()
|
||||||
.all(|next_cost| *next_cost != self.remaining_cost)
|
.any(|next_cost| *next_cost == self.remaining_cost)
|
||||||
{
|
{
|
||||||
return Ok(ControlFlow::Continue(false));
|
return Ok(ControlFlow::Continue(false));
|
||||||
}
|
}
|
||||||
@ -182,7 +189,7 @@ impl<G: RankingRuleGraphTrait> VisitorState<G> {
|
|||||||
self.forbidden_conditions.union(&next_forbidden);
|
self.forbidden_conditions.union(&next_forbidden);
|
||||||
}
|
}
|
||||||
let old_fctn = self.forbidden_conditions_to_nodes.clone();
|
let old_fctn = self.forbidden_conditions_to_nodes.clone();
|
||||||
self.forbidden_conditions_to_nodes.union(edge_forbidden_nodes);
|
self.forbidden_conditions_to_nodes.union(edge_new_nodes_to_skip);
|
||||||
|
|
||||||
let cf = self.visit_node(dest_node, visit, ctx)?;
|
let cf = self.visit_node(dest_node, visit, ctx)?;
|
||||||
|
|
||||||
@ -212,22 +219,21 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
while let Some(cur_node) = node_stack.pop_front() {
|
while let Some(cur_node) = node_stack.pop_front() {
|
||||||
let mut self_costs = BTreeSet::<u64>::new();
|
let mut self_costs = Vec::<u64>::new();
|
||||||
|
|
||||||
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
||||||
for edge_idx in cur_node_edges.iter() {
|
for edge_idx in cur_node_edges.iter() {
|
||||||
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
||||||
let succ_node = edge.dest_node;
|
let succ_node = edge.dest_node;
|
||||||
let succ_costs = costs_to_end.get(succ_node);
|
let succ_costs = costs_to_end.get(succ_node);
|
||||||
for succ_distance in succ_costs {
|
for succ_cost in succ_costs {
|
||||||
self_costs.insert(edge.cost as u64 + succ_distance);
|
self_costs.push(edge.cost as u64 + succ_cost);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
let costs_to_end_cur_node = costs_to_end.get_mut(cur_node);
|
self_costs.sort_unstable();
|
||||||
for cost in self_costs.iter() {
|
self_costs.dedup();
|
||||||
costs_to_end_cur_node.push(*cost);
|
|
||||||
}
|
*costs_to_end.get_mut(cur_node) = self_costs;
|
||||||
*costs_to_end.get_mut(cur_node) = self_costs.into_iter().collect();
|
|
||||||
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
|
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
|
||||||
if !enqueued.contains(prev_node) {
|
if !enqueued.contains(prev_node) {
|
||||||
node_stack.push_back(prev_node);
|
node_stack.push_back(prev_node);
|
||||||
@ -237,4 +243,56 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
}
|
}
|
||||||
costs_to_end
|
costs_to_end
|
||||||
}
|
}
|
||||||
|
|
||||||
|
pub fn update_all_costs_before_nodes(
|
||||||
|
&self,
|
||||||
|
removed_nodes: &BTreeSet<Interned<QueryNode>>,
|
||||||
|
costs: &mut MappedInterner<QueryNode, Vec<u64>>,
|
||||||
|
) {
|
||||||
|
// unsafe {
|
||||||
|
// FIND_ALL_COSTS_INC_COUNT += 1;
|
||||||
|
// println!(
|
||||||
|
// "update_all_costs_after_removing_edge incrementally count: {}",
|
||||||
|
// FIND_ALL_COSTS_INC_COUNT
|
||||||
|
// );
|
||||||
|
// }
|
||||||
|
|
||||||
|
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len());
|
||||||
|
let mut node_stack = VecDeque::new();
|
||||||
|
|
||||||
|
for node in removed_nodes.iter() {
|
||||||
|
enqueued.insert(*node);
|
||||||
|
node_stack.push_back(*node);
|
||||||
|
}
|
||||||
|
|
||||||
|
while let Some(cur_node) = node_stack.pop_front() {
|
||||||
|
let mut self_costs = BTreeSet::<u64>::new();
|
||||||
|
|
||||||
|
let cur_node_edges = &self.edges_of_node.get(cur_node);
|
||||||
|
for edge_idx in cur_node_edges.iter() {
|
||||||
|
let edge = self.edges_store.get(edge_idx).as_ref().unwrap();
|
||||||
|
let succ_node = edge.dest_node;
|
||||||
|
let succ_costs = costs.get(succ_node);
|
||||||
|
for succ_distance in succ_costs {
|
||||||
|
self_costs.insert(edge.cost as u64 + succ_distance);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
let costs_to_end_cur_node = costs.get_mut(cur_node);
|
||||||
|
for cost in self_costs.iter() {
|
||||||
|
costs_to_end_cur_node.push(*cost);
|
||||||
|
}
|
||||||
|
let self_costs = self_costs.into_iter().collect::<Vec<_>>();
|
||||||
|
if &self_costs == costs.get(cur_node) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
*costs.get_mut(cur_node) = self_costs;
|
||||||
|
|
||||||
|
for prev_node in self.query_graph.nodes.get(cur_node).predecessors.iter() {
|
||||||
|
if !enqueued.contains(prev_node) {
|
||||||
|
node_stack.push_back(prev_node);
|
||||||
|
enqueued.insert(prev_node);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -88,4 +88,12 @@ impl<T> DeadEndsCache<T> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// pub fn debug_print(&self, indent: usize) {
|
||||||
|
// println!("{} {:?}", " ".repeat(indent), self.forbidden.iter().collect::<Vec<_>>());
|
||||||
|
// for (condition, next) in self.conditions.iter().zip(self.next.iter()) {
|
||||||
|
// println!("{} {condition}:", " ".repeat(indent));
|
||||||
|
// next.debug_print(indent + 2);
|
||||||
|
// }
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
@ -10,10 +10,10 @@ mod cheapest_paths;
|
|||||||
mod condition_docids_cache;
|
mod condition_docids_cache;
|
||||||
mod dead_ends_cache;
|
mod dead_ends_cache;
|
||||||
|
|
||||||
/// Implementation of the `attribute` ranking rule
|
|
||||||
mod fid;
|
|
||||||
/// Implementation of the `exactness` ranking rule
|
/// Implementation of the `exactness` ranking rule
|
||||||
mod exactness;
|
mod exactness;
|
||||||
|
/// Implementation of the `attribute` ranking rule
|
||||||
|
mod fid;
|
||||||
/// Implementation of the `position` ranking rule
|
/// Implementation of the `position` ranking rule
|
||||||
mod position;
|
mod position;
|
||||||
/// Implementation of the `proximity` ranking rule
|
/// Implementation of the `proximity` ranking rule
|
||||||
@ -21,13 +21,14 @@ mod proximity;
|
|||||||
/// Implementation of the `typo` ranking rule
|
/// Implementation of the `typo` ranking rule
|
||||||
mod typo;
|
mod typo;
|
||||||
|
|
||||||
|
use std::collections::BTreeSet;
|
||||||
use std::hash::Hash;
|
use std::hash::Hash;
|
||||||
|
|
||||||
pub use fid::{FidCondition, FidGraph};
|
|
||||||
pub use cheapest_paths::PathVisitor;
|
pub use cheapest_paths::PathVisitor;
|
||||||
pub use condition_docids_cache::ConditionDocIdsCache;
|
pub use condition_docids_cache::ConditionDocIdsCache;
|
||||||
pub use dead_ends_cache::DeadEndsCache;
|
pub use dead_ends_cache::DeadEndsCache;
|
||||||
pub use exactness::{ExactnessCondition, ExactnessGraph};
|
pub use exactness::{ExactnessCondition, ExactnessGraph};
|
||||||
|
pub use fid::{FidCondition, FidGraph};
|
||||||
pub use position::{PositionCondition, PositionGraph};
|
pub use position::{PositionCondition, PositionGraph};
|
||||||
pub use proximity::{ProximityCondition, ProximityGraph};
|
pub use proximity::{ProximityCondition, ProximityGraph};
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -130,7 +131,12 @@ impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
|||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||||
/// Remove all edges with the given condition
|
/// Remove all edges with the given condition
|
||||||
pub fn remove_edges_with_condition(&mut self, condition_to_remove: Interned<G::Condition>) {
|
/// Return a set of all the source nodes of the removed edges
|
||||||
|
pub fn remove_edges_with_condition(
|
||||||
|
&mut self,
|
||||||
|
condition_to_remove: Interned<G::Condition>,
|
||||||
|
) -> BTreeSet<Interned<QueryNode>> {
|
||||||
|
let mut source_nodes = BTreeSet::new();
|
||||||
for (edge_id, edge_opt) in self.edges_store.iter_mut() {
|
for (edge_id, edge_opt) in self.edges_store.iter_mut() {
|
||||||
let Some(edge) = edge_opt.as_mut() else { continue };
|
let Some(edge) = edge_opt.as_mut() else { continue };
|
||||||
let Some(condition) = edge.condition else { continue };
|
let Some(condition) = edge.condition else { continue };
|
||||||
@ -139,7 +145,9 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
let (source_node, _dest_node) = (edge.source_node, edge.dest_node);
|
||||||
*edge_opt = None;
|
*edge_opt = None;
|
||||||
self.edges_of_node.get_mut(source_node).remove(edge_id);
|
self.edges_of_node.get_mut(source_node).remove(edge_id);
|
||||||
|
source_nodes.insert(source_node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
source_nodes
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -74,7 +74,7 @@ impl RankingRuleGraphTrait for PositionGraph {
|
|||||||
|
|
||||||
let mut edges = vec![];
|
let mut edges = vec![];
|
||||||
for position in all_positions {
|
for position in all_positions {
|
||||||
let cost = {
|
let sum_positions = {
|
||||||
let mut cost = 0;
|
let mut cost = 0;
|
||||||
for i in 0..term.term_ids.len() {
|
for i in 0..term.term_ids.len() {
|
||||||
// This is actually not fully correct and slightly penalises ngrams unfairly.
|
// This is actually not fully correct and slightly penalises ngrams unfairly.
|
||||||
@ -89,7 +89,7 @@ impl RankingRuleGraphTrait for PositionGraph {
|
|||||||
// TODO: We can improve performances and relevancy by storing
|
// TODO: We can improve performances and relevancy by storing
|
||||||
// the term subsets associated to each position fetched.
|
// the term subsets associated to each position fetched.
|
||||||
edges.push((
|
edges.push((
|
||||||
cost,
|
cost_from_sum_positions(sum_positions),
|
||||||
conditions_interner.insert(PositionCondition {
|
conditions_interner.insert(PositionCondition {
|
||||||
term: term.clone(), // TODO remove this ugly clone
|
term: term.clone(), // TODO remove this ugly clone
|
||||||
position,
|
position,
|
||||||
@ -100,3 +100,26 @@ impl RankingRuleGraphTrait for PositionGraph {
|
|||||||
Ok(edges)
|
Ok(edges)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
fn cost_from_sum_positions(sum_positions: u32) -> u32 {
|
||||||
|
match sum_positions {
|
||||||
|
0 | 1 | 2 | 3 => sum_positions,
|
||||||
|
4 | 5 => 4,
|
||||||
|
6 | 7 => 5,
|
||||||
|
8 | 9 => 6,
|
||||||
|
10 | 11 => 7,
|
||||||
|
12 | 13 => 8,
|
||||||
|
14 | 15 => 9,
|
||||||
|
16 | 17..=24 => 10,
|
||||||
|
25..=32 => 11,
|
||||||
|
33..=64 => 12,
|
||||||
|
65..=128 => 13,
|
||||||
|
129..=256 => 14,
|
||||||
|
257..=512 => 15,
|
||||||
|
513..=1024 => 16,
|
||||||
|
1025..=2048 => 17,
|
||||||
|
2049..=4096 => 18,
|
||||||
|
4097..=8192 => 19,
|
||||||
|
_ => 20,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user