mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 06:44:27 +01:00
Apply a few optimisations for graph-based ranking rules
This commit is contained in:
parent
e8c76cf7bf
commit
9051065c22
@ -21,7 +21,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
graph: RankingRuleGraph<G>,
|
graph: RankingRuleGraph<G>,
|
||||||
edge_docids_cache: EdgeDocidsCache<G>,
|
edge_docids_cache: EdgeDocidsCache<G>,
|
||||||
empty_paths_cache: EmptyPathsCache,
|
empty_paths_cache: EmptyPathsCache,
|
||||||
all_distances: Vec<Vec<u64>>,
|
all_distances: Vec<Vec<u16>>,
|
||||||
cur_distance_idx: usize,
|
cur_distance_idx: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -32,14 +32,14 @@ fn remove_empty_edges<'search, G: RankingRuleGraphTrait>(
|
|||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
empty_paths_cache: &mut EmptyPathsCache,
|
empty_paths_cache: &mut EmptyPathsCache,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
for edge_index in 0..graph.all_edges.len() as u32 {
|
for edge_index in 0..graph.all_edges.len() as u16 {
|
||||||
if graph.all_edges[edge_index as usize].is_none() {
|
if graph.all_edges[edge_index as usize].is_none() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?;
|
let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?;
|
||||||
match docids {
|
match docids {
|
||||||
BitmapOrAllRef::Bitmap(bitmap) => {
|
BitmapOrAllRef::Bitmap(docids) => {
|
||||||
if bitmap.is_disjoint(universe) {
|
if docids.is_disjoint(universe) {
|
||||||
graph.remove_edge(edge_index);
|
graph.remove_edge(edge_index);
|
||||||
empty_paths_cache.forbid_edge(edge_index);
|
empty_paths_cache.forbid_edge(edge_index);
|
||||||
edge_docids_cache.cache.remove(&edge_index);
|
edge_docids_cache.cache.remove(&edge_index);
|
||||||
@ -68,7 +68,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
// TODO: update old state instead of starting from scratch
|
// TODO: update old state instead of starting from scratch
|
||||||
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
||||||
let mut edge_docids_cache = EdgeDocidsCache::default();
|
let mut edge_docids_cache = EdgeDocidsCache::default();
|
||||||
let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len());
|
let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len() as u16);
|
||||||
|
|
||||||
remove_empty_edges(
|
remove_empty_edges(
|
||||||
ctx,
|
ctx,
|
||||||
@ -118,31 +118,82 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
state.all_distances[state.graph.query_graph.root_node as usize][state.cur_distance_idx];
|
state.all_distances[state.graph.query_graph.root_node as usize][state.cur_distance_idx];
|
||||||
state.cur_distance_idx += 1;
|
state.cur_distance_idx += 1;
|
||||||
|
|
||||||
let paths = state.graph.paths_of_cost(
|
let mut bucket = RoaringBitmap::new();
|
||||||
state.graph.query_graph.root_node as usize,
|
|
||||||
|
let GraphBasedRankingRuleState {
|
||||||
|
graph,
|
||||||
|
edge_docids_cache,
|
||||||
|
empty_paths_cache,
|
||||||
|
all_distances,
|
||||||
|
cur_distance_idx: _,
|
||||||
|
} = &mut state;
|
||||||
|
|
||||||
|
let mut paths = vec![];
|
||||||
|
let original_universe = universe;
|
||||||
|
let mut universe = universe.clone();
|
||||||
|
|
||||||
|
graph.visit_paths_of_cost(
|
||||||
|
graph.query_graph.root_node as usize,
|
||||||
cost,
|
cost,
|
||||||
&state.all_distances,
|
all_distances,
|
||||||
&state.empty_paths_cache,
|
empty_paths_cache,
|
||||||
);
|
|path, graph, empty_paths_cache| {
|
||||||
|
let mut path_docids = universe.clone();
|
||||||
|
let mut visited_edges = vec![];
|
||||||
|
let mut cached_edge_docids = vec![];
|
||||||
|
for &edge_index in path {
|
||||||
|
visited_edges.push(edge_index);
|
||||||
|
let edge_docids =
|
||||||
|
edge_docids_cache.get_edge_docids(ctx, edge_index, graph, &universe)?;
|
||||||
|
let edge_docids = match edge_docids {
|
||||||
|
BitmapOrAllRef::Bitmap(b) => b,
|
||||||
|
BitmapOrAllRef::All => continue,
|
||||||
|
};
|
||||||
|
cached_edge_docids.push((edge_index, edge_docids.clone()));
|
||||||
|
if edge_docids.is_disjoint(&universe) {
|
||||||
|
// 1. Store in the cache that this edge is empty for this universe
|
||||||
|
empty_paths_cache.forbid_edge(edge_index);
|
||||||
|
// 2. remove this edge from the ranking rule graph
|
||||||
|
graph.remove_edge(edge_index);
|
||||||
|
edge_docids_cache.cache.remove(&edge_index);
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
path_docids &= edge_docids;
|
||||||
|
|
||||||
|
if path_docids.is_disjoint(&universe) {
|
||||||
|
empty_paths_cache.forbid_prefix(&visited_edges);
|
||||||
|
// if the intersection between this edge and any
|
||||||
|
// previous one is disjoint with the universe,
|
||||||
|
// then we add these two edges to the empty_path_cache
|
||||||
|
for (edge_index2, edge_docids2) in
|
||||||
|
cached_edge_docids[..cached_edge_docids.len() - 1].iter()
|
||||||
|
{
|
||||||
|
let intersection = edge_docids & edge_docids2;
|
||||||
|
if intersection.is_disjoint(&universe) {
|
||||||
|
// needs_filtering_empty_couple_edges = true;
|
||||||
|
empty_paths_cache.forbid_couple_edges(*edge_index2, edge_index);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return Ok(());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
paths.push(path.to_vec());
|
||||||
|
bucket |= &path_docids;
|
||||||
|
universe -= path_docids;
|
||||||
|
Ok(())
|
||||||
|
},
|
||||||
|
)?;
|
||||||
|
|
||||||
G::log_state(
|
G::log_state(
|
||||||
&state.graph,
|
&state.graph,
|
||||||
&paths,
|
&paths,
|
||||||
&state.empty_paths_cache,
|
&state.empty_paths_cache,
|
||||||
universe,
|
original_universe,
|
||||||
&state.all_distances,
|
&state.all_distances,
|
||||||
cost,
|
cost,
|
||||||
logger,
|
logger,
|
||||||
);
|
);
|
||||||
|
|
||||||
let bucket = state.graph.resolve_paths(
|
|
||||||
ctx,
|
|
||||||
&mut state.edge_docids_cache,
|
|
||||||
&mut state.empty_paths_cache,
|
|
||||||
universe,
|
|
||||||
paths,
|
|
||||||
)?;
|
|
||||||
|
|
||||||
let next_query_graph = state.graph.query_graph.clone();
|
let next_query_graph = state.graph.query_graph.clone();
|
||||||
|
|
||||||
self.state = Some(state);
|
self.state = Some(state);
|
||||||
|
@ -42,19 +42,19 @@ pub enum SearchEvents {
|
|||||||
},
|
},
|
||||||
ProximityState {
|
ProximityState {
|
||||||
graph: RankingRuleGraph<ProximityGraph>,
|
graph: RankingRuleGraph<ProximityGraph>,
|
||||||
paths: Vec<Vec<u32>>,
|
paths: Vec<Vec<u16>>,
|
||||||
empty_paths_cache: EmptyPathsCache,
|
empty_paths_cache: EmptyPathsCache,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
distances: Vec<Vec<u64>>,
|
distances: Vec<Vec<u16>>,
|
||||||
cost: u64,
|
cost: u16,
|
||||||
},
|
},
|
||||||
TypoState {
|
TypoState {
|
||||||
graph: RankingRuleGraph<TypoGraph>,
|
graph: RankingRuleGraph<TypoGraph>,
|
||||||
paths: Vec<Vec<u32>>,
|
paths: Vec<Vec<u16>>,
|
||||||
empty_paths_cache: EmptyPathsCache,
|
empty_paths_cache: EmptyPathsCache,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
distances: Vec<Vec<u64>>,
|
distances: Vec<Vec<u16>>,
|
||||||
cost: u64,
|
cost: u16,
|
||||||
},
|
},
|
||||||
RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap, time: Instant },
|
RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap, time: Instant },
|
||||||
}
|
}
|
||||||
@ -165,11 +165,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
|
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
|
||||||
}
|
}
|
||||||
|
|
||||||
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &[Vec<u32>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<u64>>, cost: u64,) {
|
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<u16>>, cost: u16,) {
|
||||||
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
||||||
}
|
}
|
||||||
|
|
||||||
fn log_typo_state(&mut self, query_graph: &RankingRuleGraph<TypoGraph>, paths_map: &[Vec<u32>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<u64>>, cost: u64,) {
|
fn log_typo_state(&mut self, query_graph: &RankingRuleGraph<TypoGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<u16>>, cost: u16,) {
|
||||||
self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -352,7 +352,7 @@ results.{random} {{
|
|||||||
writeln!(&mut file, "}}").unwrap();
|
writeln!(&mut file, "}}").unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, _distances: &[u64], file: &mut File) {
|
fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, _distances: &[u16], file: &mut File) {
|
||||||
match &node {
|
match &node {
|
||||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => {
|
QueryNode::Term(LocatedQueryTerm { value, .. }) => {
|
||||||
match value {
|
match value {
|
||||||
@ -420,7 +420,7 @@ shape: class").unwrap();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<u64>>, file: &mut File) {
|
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<u16>>, file: &mut File) {
|
||||||
writeln!(file,"direction: right").unwrap();
|
writeln!(file,"direction: right").unwrap();
|
||||||
|
|
||||||
writeln!(file, "Proximity Graph {{").unwrap();
|
writeln!(file, "Proximity Graph {{").unwrap();
|
||||||
@ -477,7 +477,7 @@ shape: class").unwrap();
|
|||||||
// }
|
// }
|
||||||
// writeln!(file, "}}").unwrap();
|
// writeln!(file, "}}").unwrap();
|
||||||
}
|
}
|
||||||
fn edge_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext,graph: &RankingRuleGraph<R>, edge_idx: u32, file: &mut File) {
|
fn edge_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext,graph: &RankingRuleGraph<R>, edge_idx: u16, file: &mut File) {
|
||||||
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
|
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
|
||||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
||||||
let from_node_desc = match from_node {
|
let from_node_desc = match from_node {
|
||||||
@ -511,7 +511,7 @@ shape: class").unwrap();
|
|||||||
shape: class
|
shape: class
|
||||||
}}").unwrap();
|
}}").unwrap();
|
||||||
}
|
}
|
||||||
fn paths_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], file: &mut File) {
|
fn paths_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], file: &mut File) {
|
||||||
for (path_idx, edge_indexes) in paths.iter().enumerate() {
|
for (path_idx, edge_indexes) in paths.iter().enumerate() {
|
||||||
writeln!(file, "{path_idx} {{").unwrap();
|
writeln!(file, "{path_idx} {{").unwrap();
|
||||||
for edge_idx in edge_indexes.iter() {
|
for edge_idx in edge_indexes.iter() {
|
||||||
|
@ -58,22 +58,22 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
|||||||
fn log_proximity_state(
|
fn log_proximity_state(
|
||||||
&mut self,
|
&mut self,
|
||||||
_query_graph: &RankingRuleGraph<ProximityGraph>,
|
_query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||||
_paths_map: &[Vec<u32>],
|
_paths_map: &[Vec<u16>],
|
||||||
_empty_paths_cache: &EmptyPathsCache,
|
_empty_paths_cache: &EmptyPathsCache,
|
||||||
_universe: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
_distances: Vec<Vec<u64>>,
|
_distances: Vec<Vec<u16>>,
|
||||||
_cost: u64,
|
_cost: u16,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
|
|
||||||
fn log_typo_state(
|
fn log_typo_state(
|
||||||
&mut self,
|
&mut self,
|
||||||
_query_graph: &RankingRuleGraph<TypoGraph>,
|
_query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
_paths: &[Vec<u32>],
|
_paths: &[Vec<u16>],
|
||||||
_empty_paths_cache: &EmptyPathsCache,
|
_empty_paths_cache: &EmptyPathsCache,
|
||||||
_universe: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
_distances: Vec<Vec<u64>>,
|
_distances: Vec<Vec<u16>>,
|
||||||
_cost: u64,
|
_cost: u16,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -120,20 +120,20 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
|||||||
fn log_proximity_state(
|
fn log_proximity_state(
|
||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<ProximityGraph>,
|
query_graph: &RankingRuleGraph<ProximityGraph>,
|
||||||
paths: &[Vec<u32>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
_distances: Vec<Vec<u64>>,
|
distances: Vec<Vec<u16>>,
|
||||||
cost: u64,
|
cost: u16,
|
||||||
);
|
);
|
||||||
|
|
||||||
fn log_typo_state(
|
fn log_typo_state(
|
||||||
&mut self,
|
&mut self,
|
||||||
query_graph: &RankingRuleGraph<TypoGraph>,
|
query_graph: &RankingRuleGraph<TypoGraph>,
|
||||||
paths: &[Vec<u32>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
_distances: Vec<Vec<u64>>,
|
distances: Vec<Vec<u16>>,
|
||||||
cost: u64,
|
cost: u16,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -7,31 +7,26 @@ mod query_term;
|
|||||||
mod ranking_rule_graph;
|
mod ranking_rule_graph;
|
||||||
mod ranking_rules;
|
mod ranking_rules;
|
||||||
mod resolve_query_graph;
|
mod resolve_query_graph;
|
||||||
|
mod small_bitmap;
|
||||||
mod sort;
|
mod sort;
|
||||||
mod words;
|
mod words;
|
||||||
|
|
||||||
use std::collections::BTreeSet;
|
use self::interner::Interner;
|
||||||
|
use self::logger::SearchLogger;
|
||||||
pub use ranking_rules::{
|
use self::query_term::Phrase;
|
||||||
apply_ranking_rules, RankingRule, RankingRuleOutput, RankingRuleOutputIter,
|
use self::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
|
||||||
RankingRuleOutputIterWrapper, RankingRuleQueryTrait,
|
use crate::new::query_term::located_query_terms_from_string;
|
||||||
};
|
use crate::{Filter, Index, Result, TermsMatchingStrategy};
|
||||||
|
|
||||||
use crate::{
|
|
||||||
new::query_term::located_query_terms_from_string, Filter, Index, Result, TermsMatchingStrategy,
|
|
||||||
};
|
|
||||||
use charabia::Tokenize;
|
use charabia::Tokenize;
|
||||||
use db_cache::DatabaseCache;
|
use db_cache::DatabaseCache;
|
||||||
use heed::RoTxn;
|
use heed::RoTxn;
|
||||||
use query_graph::{QueryGraph, QueryNode};
|
use query_graph::{QueryGraph, QueryNode};
|
||||||
use roaring::RoaringBitmap;
|
pub use ranking_rules::{
|
||||||
|
apply_ranking_rules, RankingRule, RankingRuleOutput, RankingRuleOutputIter,
|
||||||
use self::{
|
RankingRuleOutputIterWrapper, RankingRuleQueryTrait,
|
||||||
interner::Interner,
|
|
||||||
logger::SearchLogger,
|
|
||||||
query_term::Phrase,
|
|
||||||
resolve_query_graph::{resolve_query_graph, NodeDocIdsCache},
|
|
||||||
};
|
};
|
||||||
|
use roaring::RoaringBitmap;
|
||||||
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
pub enum BitmapOrAllRef<'s> {
|
pub enum BitmapOrAllRef<'s> {
|
||||||
Bitmap(&'s RoaringBitmap),
|
Bitmap(&'s RoaringBitmap),
|
||||||
@ -109,7 +104,7 @@ pub fn execute_search<'search>(
|
|||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) -> Result<Vec<u32>> {
|
) -> Result<Vec<u32>> {
|
||||||
assert!(!query.is_empty());
|
assert!(!query.is_empty());
|
||||||
let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None).unwrap();
|
let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None)?;
|
||||||
let graph = QueryGraph::from_query(ctx, query_terms)?;
|
let graph = QueryGraph::from_query(ctx, query_terms)?;
|
||||||
|
|
||||||
logger.initial_query(&graph);
|
logger.initial_query(&graph);
|
||||||
@ -127,7 +122,7 @@ pub fn execute_search<'search>(
|
|||||||
TermsMatchingStrategy::Last,
|
TermsMatchingStrategy::Last,
|
||||||
logger,
|
logger,
|
||||||
)?;
|
)?;
|
||||||
// TODO: create ranking rules here, reuse the node docids cache for the words ranking rule
|
// TODO: create ranking rules here
|
||||||
|
|
||||||
logger.initial_universe(&universe);
|
logger.initial_universe(&universe);
|
||||||
|
|
||||||
|
@ -1,8 +1,7 @@
|
|||||||
use super::query_term::{self, LocatedQueryTerm, QueryTerm, WordDerivations};
|
use super::query_term::{self, LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||||
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::SearchContext;
|
use super::SearchContext;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
use std::fmt::Debug;
|
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub enum QueryNode {
|
pub enum QueryNode {
|
||||||
@ -12,17 +11,17 @@ pub enum QueryNode {
|
|||||||
End,
|
End,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Clone)]
|
||||||
pub struct Edges {
|
pub struct Edges {
|
||||||
// TODO: use a tiny bitset instead, something like a simple Vec<u8> where most queries will see a vector of one element
|
// TODO: use a tiny bitset instead, something like a simple Vec<u8> where most queries will see a vector of one element
|
||||||
pub predecessors: RoaringBitmap,
|
pub predecessors: SmallBitmap,
|
||||||
pub successors: RoaringBitmap,
|
pub successors: SmallBitmap,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct QueryGraph {
|
pub struct QueryGraph {
|
||||||
pub root_node: u32,
|
pub root_node: u16,
|
||||||
pub end_node: u32,
|
pub end_node: u16,
|
||||||
pub nodes: Vec<QueryNode>,
|
pub nodes: Vec<QueryNode>,
|
||||||
pub edges: Vec<Edges>,
|
pub edges: Vec<Edges>,
|
||||||
}
|
}
|
||||||
@ -30,7 +29,7 @@ pub struct QueryGraph {
|
|||||||
fn _assert_sizes() {
|
fn _assert_sizes() {
|
||||||
// TODO: QueryNodes are too big now, 88B is a bit too big
|
// TODO: QueryNodes are too big now, 88B is a bit too big
|
||||||
let _: [u8; 88] = [0; std::mem::size_of::<QueryNode>()];
|
let _: [u8; 88] = [0; std::mem::size_of::<QueryNode>()];
|
||||||
let _: [u8; 48] = [0; std::mem::size_of::<Edges>()];
|
let _: [u8; 32] = [0; std::mem::size_of::<Edges>()];
|
||||||
}
|
}
|
||||||
|
|
||||||
impl Default for QueryGraph {
|
impl Default for QueryGraph {
|
||||||
@ -38,8 +37,8 @@ impl Default for QueryGraph {
|
|||||||
fn default() -> Self {
|
fn default() -> Self {
|
||||||
let nodes = vec![QueryNode::Start, QueryNode::End];
|
let nodes = vec![QueryNode::Start, QueryNode::End];
|
||||||
let edges = vec![
|
let edges = vec![
|
||||||
Edges { predecessors: RoaringBitmap::new(), successors: RoaringBitmap::new() },
|
Edges { predecessors: SmallBitmap::new(64), successors: SmallBitmap::new(64) },
|
||||||
Edges { predecessors: RoaringBitmap::new(), successors: RoaringBitmap::new() },
|
Edges { predecessors: SmallBitmap::new(64), successors: SmallBitmap::new(64) },
|
||||||
];
|
];
|
||||||
|
|
||||||
Self { root_node: 0, end_node: 1, nodes, edges }
|
Self { root_node: 0, end_node: 1, nodes, edges }
|
||||||
@ -47,18 +46,18 @@ impl Default for QueryGraph {
|
|||||||
}
|
}
|
||||||
|
|
||||||
impl QueryGraph {
|
impl QueryGraph {
|
||||||
fn connect_to_node(&mut self, from_nodes: &[u32], to_node: u32) {
|
fn connect_to_node(&mut self, from_nodes: &[u16], to_node: u16) {
|
||||||
for &from_node in from_nodes {
|
for &from_node in from_nodes {
|
||||||
self.edges[from_node as usize].successors.insert(to_node);
|
self.edges[from_node as usize].successors.insert(to_node);
|
||||||
self.edges[to_node as usize].predecessors.insert(from_node);
|
self.edges[to_node as usize].predecessors.insert(from_node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn add_node(&mut self, from_nodes: &[u32], node: QueryNode) -> u32 {
|
fn add_node(&mut self, from_nodes: &[u16], node: QueryNode) -> u16 {
|
||||||
let new_node_idx = self.nodes.len() as u32;
|
let new_node_idx = self.nodes.len() as u16;
|
||||||
self.nodes.push(node);
|
self.nodes.push(node);
|
||||||
self.edges.push(Edges {
|
self.edges.push(Edges {
|
||||||
predecessors: from_nodes.iter().collect(),
|
predecessors: SmallBitmap::from_array(from_nodes, 64),
|
||||||
successors: RoaringBitmap::new(),
|
successors: SmallBitmap::new(64),
|
||||||
});
|
});
|
||||||
for from_node in from_nodes {
|
for from_node in from_nodes {
|
||||||
self.edges[*from_node as usize].successors.insert(new_node_idx);
|
self.edges[*from_node as usize].successors.insert(new_node_idx);
|
||||||
@ -79,7 +78,7 @@ impl QueryGraph {
|
|||||||
let word_set = ctx.index.words_fst(ctx.txn)?;
|
let word_set = ctx.index.words_fst(ctx.txn)?;
|
||||||
let mut graph = QueryGraph::default();
|
let mut graph = QueryGraph::default();
|
||||||
|
|
||||||
let (mut prev2, mut prev1, mut prev0): (Vec<u32>, Vec<u32>, Vec<u32>) =
|
let (mut prev2, mut prev1, mut prev0): (Vec<u16>, Vec<u16>, Vec<u16>) =
|
||||||
(vec![], vec![], vec![graph.root_node]);
|
(vec![], vec![], vec![graph.root_node]);
|
||||||
|
|
||||||
// TODO: split words / synonyms
|
// TODO: split words / synonyms
|
||||||
@ -157,40 +156,40 @@ impl QueryGraph {
|
|||||||
|
|
||||||
Ok(graph)
|
Ok(graph)
|
||||||
}
|
}
|
||||||
pub fn remove_nodes(&mut self, nodes: &[u32]) {
|
pub fn remove_nodes(&mut self, nodes: &[u16]) {
|
||||||
for &node in nodes {
|
for &node in nodes {
|
||||||
self.nodes[node as usize] = QueryNode::Deleted;
|
self.nodes[node as usize] = QueryNode::Deleted;
|
||||||
let edges = self.edges[node as usize].clone();
|
let edges = self.edges[node as usize].clone();
|
||||||
for pred in edges.predecessors.iter() {
|
for pred in edges.predecessors.iter() {
|
||||||
self.edges[pred as usize].successors.remove(node);
|
self.edges[pred as usize].successors.remove(node);
|
||||||
}
|
}
|
||||||
for succ in edges.successors {
|
for succ in edges.successors.iter() {
|
||||||
self.edges[succ as usize].predecessors.remove(node);
|
self.edges[succ as usize].predecessors.remove(node);
|
||||||
}
|
}
|
||||||
self.edges[node as usize] =
|
self.edges[node as usize] =
|
||||||
Edges { predecessors: RoaringBitmap::new(), successors: RoaringBitmap::new() };
|
Edges { predecessors: SmallBitmap::new(64), successors: SmallBitmap::new(64) };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn remove_nodes_keep_edges(&mut self, nodes: &[u32]) {
|
pub fn remove_nodes_keep_edges(&mut self, nodes: &[u16]) {
|
||||||
for &node in nodes {
|
for &node in nodes {
|
||||||
self.nodes[node as usize] = QueryNode::Deleted;
|
self.nodes[node as usize] = QueryNode::Deleted;
|
||||||
let edges = self.edges[node as usize].clone();
|
let edges = self.edges[node as usize].clone();
|
||||||
for pred in edges.predecessors.iter() {
|
for pred in edges.predecessors.iter() {
|
||||||
self.edges[pred as usize].successors.remove(node);
|
self.edges[pred as usize].successors.remove(node);
|
||||||
self.edges[pred as usize].successors |= &edges.successors;
|
self.edges[pred as usize].successors.union(&edges.successors);
|
||||||
}
|
}
|
||||||
for succ in edges.successors {
|
for succ in edges.successors.iter() {
|
||||||
self.edges[succ as usize].predecessors.remove(node);
|
self.edges[succ as usize].predecessors.remove(node);
|
||||||
self.edges[succ as usize].predecessors |= &edges.predecessors;
|
self.edges[succ as usize].predecessors.union(&edges.predecessors);
|
||||||
}
|
}
|
||||||
self.edges[node as usize] =
|
self.edges[node as usize] =
|
||||||
Edges { predecessors: RoaringBitmap::new(), successors: RoaringBitmap::new() };
|
Edges { predecessors: SmallBitmap::new(64), successors: SmallBitmap::new(64) };
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn remove_words_at_position(&mut self, position: i8) -> bool {
|
pub fn remove_words_at_position(&mut self, position: i8) -> bool {
|
||||||
let mut nodes_to_remove_keeping_edges = vec![];
|
let mut nodes_to_remove_keeping_edges = vec![];
|
||||||
for (node_idx, node) in self.nodes.iter().enumerate() {
|
for (node_idx, node) in self.nodes.iter().enumerate() {
|
||||||
let node_idx = node_idx as u32;
|
let node_idx = node_idx as u16;
|
||||||
let QueryNode::Term(LocatedQueryTerm { value: _, positions }) = node else { continue };
|
let QueryNode::Term(LocatedQueryTerm { value: _, positions }) = node else { continue };
|
||||||
if positions.start() == &position {
|
if positions.start() == &position {
|
||||||
nodes_to_remove_keeping_edges.push(node_idx);
|
nodes_to_remove_keeping_edges.push(node_idx);
|
||||||
@ -212,7 +211,7 @@ impl QueryGraph {
|
|||||||
|| (!matches!(node, QueryNode::Start | QueryNode::Deleted)
|
|| (!matches!(node, QueryNode::Start | QueryNode::Deleted)
|
||||||
&& self.edges[node_idx].predecessors.is_empty())
|
&& self.edges[node_idx].predecessors.is_empty())
|
||||||
{
|
{
|
||||||
nodes_to_remove.push(node_idx as u32);
|
nodes_to_remove.push(node_idx as u16);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if nodes_to_remove.is_empty() {
|
if nodes_to_remove.is_empty() {
|
||||||
|
@ -1,40 +1,54 @@
|
|||||||
|
use std::collections::HashSet;
|
||||||
|
|
||||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
|
use crate::new::small_bitmap::SmallBitmap;
|
||||||
use crate::new::{QueryGraph, SearchContext};
|
use crate::new::{QueryGraph, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||||
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
||||||
let mut ranking_rule_graph =
|
let QueryGraph { nodes: graph_nodes, edges: graph_edges, .. } = &query_graph;
|
||||||
Self { query_graph, all_edges: vec![], node_edges: vec![], successors: vec![] };
|
|
||||||
|
|
||||||
for (node_idx, node) in ranking_rule_graph.query_graph.nodes.iter().enumerate() {
|
let mut all_edges = vec![];
|
||||||
ranking_rule_graph.node_edges.push(RoaringBitmap::new());
|
let mut node_edges = vec![];
|
||||||
ranking_rule_graph.successors.push(RoaringBitmap::new());
|
let mut successors = vec![];
|
||||||
let new_edges = ranking_rule_graph.node_edges.last_mut().unwrap();
|
|
||||||
let new_successors = ranking_rule_graph.successors.last_mut().unwrap();
|
for (node_idx, node) in graph_nodes.iter().enumerate() {
|
||||||
|
node_edges.push(HashSet::new());
|
||||||
|
successors.push(HashSet::new());
|
||||||
|
let new_edges = node_edges.last_mut().unwrap();
|
||||||
|
let new_successors = successors.last_mut().unwrap();
|
||||||
|
|
||||||
let Some(from_node_data) = G::build_visit_from_node(ctx, node)? else { continue };
|
let Some(from_node_data) = G::build_visit_from_node(ctx, node)? else { continue };
|
||||||
|
|
||||||
for successor_idx in ranking_rule_graph.query_graph.edges[node_idx].successors.iter() {
|
for successor_idx in graph_edges[node_idx].successors.iter() {
|
||||||
let to_node = &ranking_rule_graph.query_graph.nodes[successor_idx as usize];
|
let to_node = &graph_nodes[successor_idx as usize];
|
||||||
let mut edges = G::build_visit_to_node(ctx, to_node, &from_node_data)?;
|
let mut edges = G::build_visit_to_node(ctx, to_node, &from_node_data)?;
|
||||||
if edges.is_empty() {
|
if edges.is_empty() {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
edges.sort_by_key(|e| e.0);
|
edges.sort_by_key(|e| e.0);
|
||||||
for (cost, details) in edges {
|
for (cost, details) in edges {
|
||||||
ranking_rule_graph.all_edges.push(Some(Edge {
|
all_edges.push(Some(Edge {
|
||||||
from_node: node_idx as u32,
|
from_node: node_idx as u16,
|
||||||
to_node: successor_idx,
|
to_node: successor_idx,
|
||||||
cost,
|
cost,
|
||||||
details,
|
details,
|
||||||
}));
|
}));
|
||||||
new_edges.insert(ranking_rule_graph.all_edges.len() as u32 - 1);
|
new_edges.insert(all_edges.len() as u16 - 1);
|
||||||
new_successors.insert(successor_idx);
|
new_successors.insert(successor_idx);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
Ok(ranking_rule_graph)
|
let node_edges = node_edges
|
||||||
|
.into_iter()
|
||||||
|
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), all_edges.len() as u16))
|
||||||
|
.collect();
|
||||||
|
let successors = successors
|
||||||
|
.into_iter()
|
||||||
|
.map(|edges| SmallBitmap::from_iter(edges.into_iter(), all_edges.len() as u16))
|
||||||
|
.collect();
|
||||||
|
|
||||||
|
Ok(RankingRuleGraph { query_graph, all_edges, node_edges, successors })
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -2,124 +2,146 @@
|
|||||||
|
|
||||||
use super::empty_paths_cache::EmptyPathsCache;
|
use super::empty_paths_cache::EmptyPathsCache;
|
||||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
|
use crate::new::small_bitmap::SmallBitmap;
|
||||||
|
use crate::Result;
|
||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct Path {
|
pub struct Path {
|
||||||
pub edges: Vec<u32>,
|
pub edges: Vec<u16>,
|
||||||
pub cost: u64,
|
pub cost: u64,
|
||||||
}
|
}
|
||||||
|
|
||||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||||
pub fn paths_of_cost(
|
pub fn visit_paths_of_cost(
|
||||||
&self,
|
&mut self,
|
||||||
from: usize,
|
from: usize,
|
||||||
cost: u64,
|
cost: u16,
|
||||||
all_distances: &[Vec<u64>],
|
all_distances: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &mut EmptyPathsCache,
|
||||||
) -> Vec<Vec<u32>> {
|
mut visit: impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
|
||||||
let mut paths = vec![];
|
) -> Result<()> {
|
||||||
self.paths_of_cost_rec(
|
let _ = self.visit_paths_of_cost_rec(
|
||||||
from,
|
from,
|
||||||
all_distances,
|
|
||||||
cost,
|
cost,
|
||||||
&mut vec![],
|
all_distances,
|
||||||
&mut paths,
|
|
||||||
&vec![false; self.all_edges.len()],
|
|
||||||
empty_paths_cache,
|
empty_paths_cache,
|
||||||
);
|
&mut visit,
|
||||||
paths
|
&mut vec![],
|
||||||
|
&mut SmallBitmap::new(self.all_edges.len() as u16),
|
||||||
|
empty_paths_cache.empty_edges.clone(),
|
||||||
|
)?;
|
||||||
|
Ok(())
|
||||||
}
|
}
|
||||||
pub fn paths_of_cost_rec(
|
pub fn visit_paths_of_cost_rec(
|
||||||
&self,
|
&mut self,
|
||||||
from: usize,
|
from: usize,
|
||||||
all_distances: &[Vec<u64>],
|
cost: u16,
|
||||||
cost: u64,
|
// TODO: replace all_distances with a Vec<SmallBitmap> where the SmallBitmap contains true if the cost exists and false otherwise
|
||||||
prev_edges: &mut Vec<u32>,
|
all_distances: &[Vec<u16>],
|
||||||
paths: &mut Vec<Vec<u32>>,
|
empty_paths_cache: &mut EmptyPathsCache,
|
||||||
forbidden_edges: &[bool],
|
visit: &mut impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
// replace prev edges by:
|
||||||
) {
|
// (1) a small bitmap representing the path
|
||||||
let distances = &all_distances[from];
|
// (2) a pointer within the EmptyPathsCache::forbidden_prefixes structure
|
||||||
if !distances.contains(&cost) {
|
prev_edges: &mut Vec<u16>,
|
||||||
panic!();
|
cur_path: &mut SmallBitmap,
|
||||||
}
|
mut forbidden_edges: SmallBitmap,
|
||||||
let tos = &self.query_graph.edges[from].successors;
|
) -> Result<bool> {
|
||||||
let mut valid_edges = vec![];
|
let mut any_valid = false;
|
||||||
for to in tos {
|
|
||||||
self.visit_edges::<()>(from as u32, to, |edge_idx, edge| {
|
|
||||||
if cost >= edge.cost as u64
|
|
||||||
&& all_distances[to as usize].contains(&(cost - edge.cost as u64))
|
|
||||||
&& !forbidden_edges[edge_idx as usize]
|
|
||||||
{
|
|
||||||
valid_edges.push((edge_idx, edge.cost, to));
|
|
||||||
}
|
|
||||||
std::ops::ControlFlow::Continue(())
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
for (edge_idx, edge_cost, to) in valid_edges {
|
let edges = self.node_edges[from].clone();
|
||||||
prev_edges.push(edge_idx);
|
for edge_idx in edges.iter() {
|
||||||
if empty_paths_cache.empty_prefixes.contains_prefix_of_path(prev_edges) {
|
let Some(edge) = self.all_edges[edge_idx as usize].as_ref() else { continue };
|
||||||
|
if cost < edge.cost as u16
|
||||||
|
|| forbidden_edges.contains(edge_idx)
|
||||||
|
|| !all_distances[edge.to_node as usize].contains(&(cost - edge.cost as u16))
|
||||||
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
let mut new_forbidden_edges = forbidden_edges.to_vec();
|
cur_path.insert(edge_idx);
|
||||||
for edge_idx in empty_paths_cache.empty_couple_edges[edge_idx as usize].iter() {
|
prev_edges.push(edge_idx);
|
||||||
new_forbidden_edges[*edge_idx as usize] = true;
|
|
||||||
}
|
|
||||||
for edge_idx in empty_paths_cache.empty_prefixes.final_edges_ater_prefix(prev_edges) {
|
|
||||||
new_forbidden_edges[edge_idx as usize] = true;
|
|
||||||
}
|
|
||||||
|
|
||||||
if to == self.query_graph.end_node {
|
let mut new_forbidden_edges = forbidden_edges.clone();
|
||||||
paths.push(prev_edges.clone());
|
new_forbidden_edges.union(&empty_paths_cache.empty_couple_edges[edge_idx as usize]);
|
||||||
|
empty_paths_cache.empty_prefixes.final_edges_after_prefix(prev_edges, &mut |x| {
|
||||||
|
new_forbidden_edges.insert(x);
|
||||||
|
});
|
||||||
|
|
||||||
|
let next_any_valid = if edge.to_node == self.query_graph.end_node {
|
||||||
|
any_valid = true;
|
||||||
|
visit(prev_edges, self, empty_paths_cache)?;
|
||||||
|
true
|
||||||
} else {
|
} else {
|
||||||
self.paths_of_cost_rec(
|
self.visit_paths_of_cost_rec(
|
||||||
to as usize,
|
edge.to_node as usize,
|
||||||
|
cost - edge.cost as u16,
|
||||||
all_distances,
|
all_distances,
|
||||||
cost - edge_cost as u64,
|
|
||||||
prev_edges,
|
|
||||||
paths,
|
|
||||||
&new_forbidden_edges,
|
|
||||||
empty_paths_cache,
|
empty_paths_cache,
|
||||||
)
|
visit,
|
||||||
}
|
prev_edges,
|
||||||
|
cur_path,
|
||||||
|
new_forbidden_edges,
|
||||||
|
)?
|
||||||
|
};
|
||||||
|
any_valid |= next_any_valid;
|
||||||
|
cur_path.remove(edge_idx);
|
||||||
prev_edges.pop();
|
prev_edges.pop();
|
||||||
|
if next_any_valid {
|
||||||
|
if empty_paths_cache.path_is_empty(prev_edges, cur_path) {
|
||||||
|
return Ok(any_valid);
|
||||||
|
}
|
||||||
|
forbidden_edges.union(&empty_paths_cache.empty_edges);
|
||||||
|
for edge in prev_edges.iter() {
|
||||||
|
forbidden_edges.union(&empty_paths_cache.empty_couple_edges[*edge as usize]);
|
||||||
|
}
|
||||||
|
empty_paths_cache.empty_prefixes.final_edges_after_prefix(prev_edges, &mut |x| {
|
||||||
|
forbidden_edges.insert(x);
|
||||||
|
});
|
||||||
|
}
|
||||||
|
if next_any_valid && empty_paths_cache.path_is_empty(prev_edges, cur_path) {
|
||||||
|
return Ok(any_valid);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Ok(any_valid)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn initialize_distances_cheapest(&self) -> Vec<Vec<u64>> {
|
pub fn initialize_distances_cheapest(&self) -> Vec<Vec<u16>> {
|
||||||
let mut distances_to_end: Vec<Vec<u64>> = vec![vec![]; self.query_graph.nodes.len()];
|
let mut distances_to_end: Vec<Vec<u16>> = vec![vec![]; self.query_graph.nodes.len()];
|
||||||
let mut enqueued = vec![false; self.query_graph.nodes.len()];
|
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len() as u16);
|
||||||
|
|
||||||
let mut node_stack = VecDeque::new();
|
let mut node_stack = VecDeque::new();
|
||||||
|
|
||||||
distances_to_end[self.query_graph.end_node as usize] = vec![0];
|
distances_to_end[self.query_graph.end_node as usize] = vec![0];
|
||||||
|
|
||||||
for prev_node in
|
for prev_node in
|
||||||
self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter()
|
self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter()
|
||||||
{
|
{
|
||||||
node_stack.push_back(prev_node as usize);
|
node_stack.push_back(prev_node as usize);
|
||||||
enqueued[prev_node as usize] = true;
|
enqueued.insert(prev_node);
|
||||||
}
|
}
|
||||||
|
|
||||||
while let Some(cur_node) = node_stack.pop_front() {
|
while let Some(cur_node) = node_stack.pop_front() {
|
||||||
let mut self_distances = vec![];
|
let mut self_distances = vec![];
|
||||||
for succ_node in self.query_graph.edges[cur_node].successors.iter() {
|
|
||||||
|
let cur_node_edges = &self.node_edges[cur_node];
|
||||||
|
for edge_idx in cur_node_edges.iter() {
|
||||||
|
let edge = self.all_edges[edge_idx as usize].as_ref().unwrap();
|
||||||
|
let succ_node = edge.to_node;
|
||||||
let succ_distances = &distances_to_end[succ_node as usize];
|
let succ_distances = &distances_to_end[succ_node as usize];
|
||||||
let _ = self.visit_edges::<()>(cur_node as u32, succ_node, |_, edge| {
|
for succ_distance in succ_distances {
|
||||||
for succ_distance in succ_distances {
|
self_distances.push(edge.cost as u16 + succ_distance);
|
||||||
self_distances.push(edge.cost as u64 + succ_distance);
|
}
|
||||||
}
|
|
||||||
std::ops::ControlFlow::Continue(())
|
|
||||||
});
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self_distances.sort_unstable();
|
self_distances.sort_unstable();
|
||||||
self_distances.dedup();
|
self_distances.dedup();
|
||||||
distances_to_end[cur_node] = self_distances;
|
distances_to_end[cur_node] = self_distances;
|
||||||
for prev_node in self.query_graph.edges[cur_node].predecessors.iter() {
|
for prev_node in self.query_graph.edges[cur_node].predecessors.iter() {
|
||||||
if !enqueued[prev_node as usize] {
|
if !enqueued.contains(prev_node) {
|
||||||
node_stack.push_back(prev_node as usize);
|
node_stack.push_back(prev_node as usize);
|
||||||
enqueued[prev_node as usize] = true;
|
enqueued.insert(prev_node);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -11,9 +11,20 @@ use roaring::RoaringBitmap;
|
|||||||
// computing their hash and comparing them
|
// computing their hash and comparing them
|
||||||
// which can be done...
|
// which can be done...
|
||||||
// by using a pointer (real, Rc, bumpalo, or in a vector)???
|
// by using a pointer (real, Rc, bumpalo, or in a vector)???
|
||||||
|
//
|
||||||
|
// But actually.... the edge details' docids are a subset of the universe at the
|
||||||
|
// moment they were computed.
|
||||||
|
// But the universes between two iterations of a ranking rule are completely different
|
||||||
|
// Thus, there is no point in doing this.
|
||||||
|
// UNLESS...
|
||||||
|
// we compute the whole docids corresponding to the edge details (potentially expensive in time and memory
|
||||||
|
// in the common case)
|
||||||
|
//
|
||||||
|
// But we could still benefit within a single iteration for requests like:
|
||||||
|
// `a a a a a a a a a` where we have many of the same edge details, repeated
|
||||||
|
|
||||||
pub struct EdgeDocidsCache<G: RankingRuleGraphTrait> {
|
pub struct EdgeDocidsCache<G: RankingRuleGraphTrait> {
|
||||||
pub cache: FxHashMap<u32, RoaringBitmap>,
|
pub cache: FxHashMap<u16, RoaringBitmap>,
|
||||||
_phantom: PhantomData<G>,
|
_phantom: PhantomData<G>,
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> {
|
impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> {
|
||||||
@ -25,7 +36,7 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
|||||||
pub fn get_edge_docids<'s, 'search>(
|
pub fn get_edge_docids<'s, 'search>(
|
||||||
&'s mut self,
|
&'s mut self,
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
edge_index: u32,
|
edge_index: u16,
|
||||||
graph: &RankingRuleGraph<G>,
|
graph: &RankingRuleGraph<G>,
|
||||||
// TODO: maybe universe doesn't belong here
|
// TODO: maybe universe doesn't belong here
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
@ -41,7 +52,7 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
|||||||
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
|
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
|
||||||
}
|
}
|
||||||
// TODO: maybe universe doesn't belong here
|
// TODO: maybe universe doesn't belong here
|
||||||
let docids = universe & G::compute_docids(ctx, details)?;
|
let docids = universe & G::compute_docids(ctx, details, universe)?;
|
||||||
let _ = self.cache.insert(edge_index, docids);
|
let _ = self.cache.insert(edge_index, docids);
|
||||||
let docids = &self.cache[&edge_index];
|
let docids = &self.cache[&edge_index];
|
||||||
Ok(BitmapOrAllRef::Bitmap(docids))
|
Ok(BitmapOrAllRef::Bitmap(docids))
|
||||||
|
@ -1,60 +1,48 @@
|
|||||||
|
use crate::new::small_bitmap::SmallBitmap;
|
||||||
|
|
||||||
use super::paths_map::PathsMap;
|
use super::paths_map::PathsMap;
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct EmptyPathsCache {
|
pub struct EmptyPathsCache {
|
||||||
pub empty_edges: Vec<bool>,
|
pub empty_edges: SmallBitmap,
|
||||||
pub empty_prefixes: PathsMap<()>,
|
pub empty_prefixes: PathsMap<()>,
|
||||||
pub empty_couple_edges: Vec<Vec<u32>>,
|
pub empty_couple_edges: Vec<SmallBitmap>,
|
||||||
}
|
}
|
||||||
impl EmptyPathsCache {
|
impl EmptyPathsCache {
|
||||||
pub fn new(all_edges_len: usize) -> Self {
|
pub fn new(all_edges_len: u16) -> Self {
|
||||||
Self {
|
Self {
|
||||||
empty_edges: vec![false; all_edges_len],
|
empty_edges: SmallBitmap::new(all_edges_len),
|
||||||
empty_prefixes: PathsMap::default(),
|
empty_prefixes: PathsMap::default(),
|
||||||
empty_couple_edges: vec![vec![]; all_edges_len],
|
empty_couple_edges: vec![SmallBitmap::new(all_edges_len); all_edges_len as usize],
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn forbid_edge(&mut self, edge_idx: u32) {
|
pub fn forbid_edge(&mut self, edge_idx: u16) {
|
||||||
self.empty_edges[edge_idx as usize] = true;
|
self.empty_edges.insert(edge_idx);
|
||||||
self.empty_couple_edges[edge_idx as usize] = vec![];
|
self.empty_couple_edges[edge_idx as usize].clear();
|
||||||
self.empty_prefixes.remove_edge(&edge_idx);
|
self.empty_prefixes.remove_edge(&edge_idx);
|
||||||
for edges2 in self.empty_couple_edges.iter_mut() {
|
for edges2 in self.empty_couple_edges.iter_mut() {
|
||||||
if let Some(edge2_pos) = edges2.iter().position(|e| *e == edge_idx) {
|
edges2.remove(edge_idx);
|
||||||
edges2.swap_remove(edge2_pos);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn forbid_prefix(&mut self, prefix: &[u32]) {
|
pub fn forbid_prefix(&mut self, prefix: &[u16]) {
|
||||||
self.empty_prefixes.insert(prefix.iter().copied(), ());
|
self.empty_prefixes.insert(prefix.iter().copied(), ());
|
||||||
}
|
}
|
||||||
pub fn forbid_couple_edges(&mut self, edge1: u32, edge2: u32) {
|
pub fn forbid_couple_edges(&mut self, edge1: u16, edge2: u16) {
|
||||||
assert!(!self.empty_couple_edges[edge1 as usize].contains(&edge2));
|
self.empty_couple_edges[edge1 as usize].insert(edge2);
|
||||||
self.empty_couple_edges[edge1 as usize].push(edge2);
|
|
||||||
}
|
}
|
||||||
pub fn path_is_empty(&self, path: &[u32]) -> bool {
|
pub fn path_is_empty(&self, path: &[u16], path_bitmap: &SmallBitmap) -> bool {
|
||||||
for edge in path {
|
if path_bitmap.intersects(&self.empty_edges) {
|
||||||
if self.empty_edges[*edge as usize] {
|
return true;
|
||||||
|
}
|
||||||
|
for edge in path.iter() {
|
||||||
|
let forbidden_other_edges = &self.empty_couple_edges[*edge as usize];
|
||||||
|
if path_bitmap.intersects(forbidden_other_edges) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if self.empty_prefixes.contains_prefix_of_path(path) {
|
if self.empty_prefixes.contains_prefix_of_path(path) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
for (edge1, edges2) in self.empty_couple_edges.iter().enumerate() {
|
|
||||||
if let Some(pos_edge1) = path.iter().position(|e| *e == edge1 as u32) {
|
|
||||||
if path[pos_edge1..].iter().any(|e| edges2.contains(e)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// for (edge1, edge2) in self.empty_couple_edges.iter() {
|
|
||||||
// if path.contains(edge1) && path.contains(edge2) {
|
|
||||||
// return true;
|
|
||||||
// }
|
|
||||||
// }
|
|
||||||
// if self.empty_prefixes.contains_prefix_of_path(path) {
|
|
||||||
// return true;
|
|
||||||
// }
|
|
||||||
false
|
false
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -4,17 +4,16 @@ mod edge_docids_cache;
|
|||||||
mod empty_paths_cache;
|
mod empty_paths_cache;
|
||||||
mod paths_map;
|
mod paths_map;
|
||||||
mod proximity;
|
mod proximity;
|
||||||
mod resolve_paths;
|
|
||||||
mod typo;
|
mod typo;
|
||||||
|
|
||||||
use super::logger::SearchLogger;
|
use super::logger::SearchLogger;
|
||||||
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, QueryNode, SearchContext};
|
use super::{QueryGraph, QueryNode, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
pub use edge_docids_cache::EdgeDocidsCache;
|
pub use edge_docids_cache::EdgeDocidsCache;
|
||||||
pub use empty_paths_cache::EmptyPathsCache;
|
pub use empty_paths_cache::EmptyPathsCache;
|
||||||
pub use proximity::ProximityGraph;
|
pub use proximity::ProximityGraph;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
use std::ops::ControlFlow;
|
|
||||||
pub use typo::TypoGraph;
|
pub use typo::TypoGraph;
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
@ -25,15 +24,15 @@ pub enum EdgeDetails<E> {
|
|||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct Edge<E> {
|
pub struct Edge<E> {
|
||||||
pub from_node: u32,
|
pub from_node: u16,
|
||||||
pub to_node: u32,
|
pub to_node: u16,
|
||||||
pub cost: u8,
|
pub cost: u8,
|
||||||
pub details: EdgeDetails<E>,
|
pub details: EdgeDetails<E>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct EdgePointer<'graph, E> {
|
pub struct EdgePointer<'graph, E> {
|
||||||
pub index: u32,
|
pub index: u16,
|
||||||
pub edge: &'graph Edge<E>,
|
pub edge: &'graph Edge<E>,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -95,6 +94,7 @@ pub trait RankingRuleGraphTrait: Sized {
|
|||||||
fn compute_docids<'search>(
|
fn compute_docids<'search>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
edge_details: &Self::EdgeDetails,
|
edge_details: &Self::EdgeDetails,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
) -> Result<RoaringBitmap>;
|
) -> Result<RoaringBitmap>;
|
||||||
|
|
||||||
/// Prepare to build the edges outgoing from `from_node`.
|
/// Prepare to build the edges outgoing from `from_node`.
|
||||||
@ -116,11 +116,11 @@ pub trait RankingRuleGraphTrait: Sized {
|
|||||||
|
|
||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &RankingRuleGraph<Self>,
|
graph: &RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<u32>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &[Vec<u64>],
|
distances: &[Vec<u16>],
|
||||||
cost: u64,
|
cost: u16,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
@ -130,9 +130,9 @@ pub struct RankingRuleGraph<G: RankingRuleGraphTrait> {
|
|||||||
// pub edges: Vec<HashMap<usize, Vec<Edge<G::EdgeDetails>>>>,
|
// pub edges: Vec<HashMap<usize, Vec<Edge<G::EdgeDetails>>>>,
|
||||||
pub all_edges: Vec<Option<Edge<G::EdgeDetails>>>,
|
pub all_edges: Vec<Option<Edge<G::EdgeDetails>>>,
|
||||||
|
|
||||||
pub node_edges: Vec<RoaringBitmap>,
|
pub node_edges: Vec<SmallBitmap>,
|
||||||
|
|
||||||
pub successors: Vec<RoaringBitmap>,
|
pub successors: Vec<SmallBitmap>,
|
||||||
// TODO: to get the edges between two nodes:
|
// TODO: to get the edges between two nodes:
|
||||||
// 1. get node_outgoing_edges[from]
|
// 1. get node_outgoing_edges[from]
|
||||||
// 2. get node_incoming_edges[to]
|
// 2. get node_incoming_edges[to]
|
||||||
@ -149,29 +149,7 @@ impl<G: RankingRuleGraphTrait> Clone for RankingRuleGraph<G> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||||
// Visit all edges between the two given nodes in order of increasing cost.
|
pub fn remove_edge(&mut self, edge_index: u16) {
|
||||||
pub fn visit_edges<'graph, O>(
|
|
||||||
&'graph self,
|
|
||||||
from: u32,
|
|
||||||
to: u32,
|
|
||||||
mut visit: impl FnMut(u32, &'graph Edge<G::EdgeDetails>) -> ControlFlow<O>,
|
|
||||||
) -> Option<O> {
|
|
||||||
let from_edges = &self.node_edges[from as usize];
|
|
||||||
for edge_idx in from_edges {
|
|
||||||
let edge = self.all_edges[edge_idx as usize].as_ref().unwrap();
|
|
||||||
if edge.to_node == to {
|
|
||||||
let cf = visit(edge_idx, edge);
|
|
||||||
match cf {
|
|
||||||
ControlFlow::Continue(_) => continue,
|
|
||||||
ControlFlow::Break(o) => return Some(o),
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
None
|
|
||||||
}
|
|
||||||
|
|
||||||
pub fn remove_edge(&mut self, edge_index: u32) {
|
|
||||||
let edge_opt = &mut self.all_edges[edge_index as usize];
|
let edge_opt = &mut self.all_edges[edge_index as usize];
|
||||||
let Some(edge) = &edge_opt else { return };
|
let Some(edge) = &edge_opt else { return };
|
||||||
let (from_node, _to_node) = (edge.from_node, edge.to_node);
|
let (from_node, _to_node) = (edge.from_node, edge.to_node);
|
||||||
@ -180,9 +158,10 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
let from_node_edges = &mut self.node_edges[from_node as usize];
|
let from_node_edges = &mut self.node_edges[from_node as usize];
|
||||||
from_node_edges.remove(edge_index);
|
from_node_edges.remove(edge_index);
|
||||||
|
|
||||||
let mut new_successors_from_node = RoaringBitmap::new();
|
let mut new_successors_from_node = SmallBitmap::new(self.all_edges.len() as u16);
|
||||||
|
let all_edges = &self.all_edges;
|
||||||
for from_node_edge in from_node_edges.iter() {
|
for from_node_edge in from_node_edges.iter() {
|
||||||
let Edge { to_node, .. } = &self.all_edges[from_node_edge as usize].as_ref().unwrap();
|
let Edge { to_node, .. } = &all_edges[from_node_edge as usize].as_ref().unwrap();
|
||||||
new_successors_from_node.insert(*to_node);
|
new_successors_from_node.insert(*to_node);
|
||||||
}
|
}
|
||||||
self.successors[from_node as usize] = new_successors_from_node;
|
self.successors[from_node as usize] = new_successors_from_node;
|
||||||
|
@ -1,9 +1,4 @@
|
|||||||
|
use crate::new::small_bitmap::SmallBitmap;
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
use roaring::RoaringBitmap;
|
|
||||||
|
|
||||||
use super::cheapest_paths::Path;
|
use super::cheapest_paths::Path;
|
||||||
|
|
||||||
// What is PathsMap used for?
|
// What is PathsMap used for?
|
||||||
@ -13,7 +8,7 @@ use super::cheapest_paths::Path;
|
|||||||
|
|
||||||
#[derive(Debug, Clone)]
|
#[derive(Debug, Clone)]
|
||||||
pub struct PathsMap<V> {
|
pub struct PathsMap<V> {
|
||||||
pub nodes: Vec<(u32, PathsMap<V>)>,
|
pub nodes: Vec<(u16, PathsMap<V>)>,
|
||||||
pub value: Option<V>,
|
pub value: Option<V>,
|
||||||
}
|
}
|
||||||
impl<V> Default for PathsMap<V> {
|
impl<V> Default for PathsMap<V> {
|
||||||
@ -39,7 +34,7 @@ impl<V> PathsMap<V> {
|
|||||||
self.nodes.is_empty() && self.value.is_none()
|
self.nodes.is_empty() && self.value.is_none()
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn insert(&mut self, mut edges: impl Iterator<Item = u32>, value: V) {
|
pub fn insert(&mut self, mut edges: impl Iterator<Item = u16>, value: V) {
|
||||||
match edges.next() {
|
match edges.next() {
|
||||||
None => {
|
None => {
|
||||||
self.value = Some(value);
|
self.value = Some(value);
|
||||||
@ -57,7 +52,7 @@ impl<V> PathsMap<V> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn remove_first_rec(&mut self, cur: &mut Vec<u32>) -> (bool, V) {
|
fn remove_first_rec(&mut self, cur: &mut Vec<u16>) -> (bool, V) {
|
||||||
let Some((first_edge, rest)) = self.nodes.first_mut() else {
|
let Some((first_edge, rest)) = self.nodes.first_mut() else {
|
||||||
// The PathsMap has to be correct by construction here, otherwise
|
// The PathsMap has to be correct by construction here, otherwise
|
||||||
// the unwrap() will crash
|
// the unwrap() will crash
|
||||||
@ -72,7 +67,7 @@ impl<V> PathsMap<V> {
|
|||||||
(false, value)
|
(false, value)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn remove_first(&mut self) -> Option<(Vec<u32>, V)> {
|
pub fn remove_first(&mut self) -> Option<(Vec<u16>, V)> {
|
||||||
if self.is_empty() {
|
if self.is_empty() {
|
||||||
return None;
|
return None;
|
||||||
}
|
}
|
||||||
@ -81,7 +76,7 @@ impl<V> PathsMap<V> {
|
|||||||
let (_, value) = self.remove_first_rec(&mut result);
|
let (_, value) = self.remove_first_rec(&mut result);
|
||||||
Some((result, value))
|
Some((result, value))
|
||||||
}
|
}
|
||||||
pub fn iterate_rec(&self, cur: &mut Vec<u32>, visit: &mut impl FnMut(&Vec<u32>, &V)) {
|
pub fn iterate_rec(&self, cur: &mut Vec<u16>, visit: &mut impl FnMut(&Vec<u16>, &V)) {
|
||||||
if let Some(value) = &self.value {
|
if let Some(value) = &self.value {
|
||||||
visit(cur, value);
|
visit(cur, value);
|
||||||
}
|
}
|
||||||
@ -91,7 +86,7 @@ impl<V> PathsMap<V> {
|
|||||||
cur.pop();
|
cur.pop();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn iterate(&self, mut visit: impl FnMut(&Vec<u32>, &V)) {
|
pub fn iterate(&self, mut visit: impl FnMut(&Vec<u16>, &V)) {
|
||||||
self.iterate_rec(&mut vec![], &mut visit)
|
self.iterate_rec(&mut vec![], &mut visit)
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -100,7 +95,7 @@ impl<V> PathsMap<V> {
|
|||||||
self.remove_prefix(prefix);
|
self.remove_prefix(prefix);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
pub fn remove_edges(&mut self, forbidden_edges: &RoaringBitmap) {
|
pub fn remove_edges(&mut self, forbidden_edges: &SmallBitmap) {
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while i < self.nodes.len() {
|
while i < self.nodes.len() {
|
||||||
let should_remove = if forbidden_edges.contains(self.nodes[i].0) {
|
let should_remove = if forbidden_edges.contains(self.nodes[i].0) {
|
||||||
@ -118,7 +113,7 @@ impl<V> PathsMap<V> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn remove_edge(&mut self, forbidden_edge: &u32) {
|
pub fn remove_edge(&mut self, forbidden_edge: &u16) {
|
||||||
let mut i = 0;
|
let mut i = 0;
|
||||||
while i < self.nodes.len() {
|
while i < self.nodes.len() {
|
||||||
let should_remove = if &self.nodes[i].0 == forbidden_edge {
|
let should_remove = if &self.nodes[i].0 == forbidden_edge {
|
||||||
@ -136,7 +131,7 @@ impl<V> PathsMap<V> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
pub fn remove_prefix(&mut self, forbidden_prefix: &[u32]) {
|
pub fn remove_prefix(&mut self, forbidden_prefix: &[u16]) {
|
||||||
let [first_edge, remaining_prefix @ ..] = forbidden_prefix else {
|
let [first_edge, remaining_prefix @ ..] = forbidden_prefix else {
|
||||||
self.nodes.clear();
|
self.nodes.clear();
|
||||||
self.value = None;
|
self.value = None;
|
||||||
@ -160,25 +155,23 @@ impl<V> PathsMap<V> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn final_edges_ater_prefix(&self, prefix: &[u32]) -> Vec<u32> {
|
pub fn final_edges_after_prefix(&self, prefix: &[u16], visit: &mut impl FnMut(u16)) {
|
||||||
let [first_edge, remaining_prefix @ ..] = prefix else {
|
let [first_edge, remaining_prefix @ ..] = prefix else {
|
||||||
return self.nodes.iter().filter_map(|n| {
|
for node in self.nodes.iter() {
|
||||||
if n.1.value.is_some() {
|
if node.1.value.is_some() {
|
||||||
Some(n.0)
|
visit(node.0)
|
||||||
} else {
|
|
||||||
None
|
|
||||||
}
|
}
|
||||||
}).collect();
|
}
|
||||||
|
return
|
||||||
};
|
};
|
||||||
for (edge, rest) in self.nodes.iter() {
|
for (edge, rest) in self.nodes.iter() {
|
||||||
if edge == first_edge {
|
if edge == first_edge {
|
||||||
return rest.final_edges_ater_prefix(remaining_prefix);
|
return rest.final_edges_after_prefix(remaining_prefix, visit);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
vec![]
|
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn edge_indices_after_prefix(&self, prefix: &[u32]) -> Vec<u32> {
|
pub fn edge_indices_after_prefix(&self, prefix: &[u16]) -> Vec<u16> {
|
||||||
let [first_edge, remaining_prefix @ ..] = prefix else {
|
let [first_edge, remaining_prefix @ ..] = prefix else {
|
||||||
return self.nodes.iter().map(|n| n.0).collect();
|
return self.nodes.iter().map(|n| n.0).collect();
|
||||||
};
|
};
|
||||||
@ -190,7 +183,7 @@ impl<V> PathsMap<V> {
|
|||||||
vec![]
|
vec![]
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn contains_prefix_of_path(&self, path: &[u32]) -> bool {
|
pub fn contains_prefix_of_path(&self, path: &[u16]) -> bool {
|
||||||
if self.value.is_some() {
|
if self.value.is_some() {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
@ -111,6 +111,8 @@ pub fn visit_to_node<'search, 'from_data>(
|
|||||||
for word1 in derivations1.clone() {
|
for word1 in derivations1.clone() {
|
||||||
for proximity in 1..=(8 - ngram_len2) {
|
for proximity in 1..=(8 - ngram_len2) {
|
||||||
let cost = (proximity + ngram_len2 - 1) as u8;
|
let cost = (proximity + ngram_len2 - 1) as u8;
|
||||||
|
// TODO: if we had access to the universe here, we could already check whether
|
||||||
|
// the bitmap corresponding to this word pair is disjoint with the universe or not
|
||||||
if ctx
|
if ctx
|
||||||
.get_word_prefix_pair_proximity_docids(
|
.get_word_prefix_pair_proximity_docids(
|
||||||
word1,
|
word1,
|
||||||
@ -183,8 +185,13 @@ pub fn visit_to_node<'search, 'from_data>(
|
|||||||
.flat_map(|(cost, proximity_word_pairs)| {
|
.flat_map(|(cost, proximity_word_pairs)| {
|
||||||
let mut edges = vec![];
|
let mut edges = vec![];
|
||||||
for (proximity, word_pairs) in proximity_word_pairs {
|
for (proximity, word_pairs) in proximity_word_pairs {
|
||||||
edges
|
edges.push((
|
||||||
.push((cost, EdgeDetails::Data(ProximityEdge { pairs: word_pairs, proximity })))
|
cost,
|
||||||
|
EdgeDetails::Data(ProximityEdge {
|
||||||
|
pairs: word_pairs.into_boxed_slice(),
|
||||||
|
proximity,
|
||||||
|
}),
|
||||||
|
))
|
||||||
}
|
}
|
||||||
edges
|
edges
|
||||||
})
|
})
|
||||||
|
@ -1,14 +1,15 @@
|
|||||||
use super::{ProximityEdge, WordPair};
|
use super::{ProximityEdge, WordPair};
|
||||||
use crate::new::SearchContext;
|
use crate::new::SearchContext;
|
||||||
use crate::{CboRoaringBitmapCodec, Result};
|
use crate::{CboRoaringBitmapCodec, Result};
|
||||||
use roaring::{MultiOps, RoaringBitmap};
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
pub fn compute_docids<'search>(
|
pub fn compute_docids<'search>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
edge: &ProximityEdge,
|
edge: &ProximityEdge,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
let ProximityEdge { pairs, proximity } = edge;
|
let ProximityEdge { pairs, proximity } = edge;
|
||||||
let mut pair_docids = vec![];
|
let mut pair_docids = RoaringBitmap::new();
|
||||||
for pair in pairs.iter() {
|
for pair in pairs.iter() {
|
||||||
let bytes = match pair {
|
let bytes = match pair {
|
||||||
WordPair::Words { left, right } => {
|
WordPair::Words { left, right } => {
|
||||||
@ -21,10 +22,11 @@ pub fn compute_docids<'search>(
|
|||||||
ctx.get_prefix_word_pair_proximity_docids(*left_prefix, *right, *proximity)
|
ctx.get_prefix_word_pair_proximity_docids(*left_prefix, *right, *proximity)
|
||||||
}
|
}
|
||||||
}?;
|
}?;
|
||||||
let bitmap =
|
// TODO: deserialize bitmap within a universe, and (maybe) using a bump allocator?
|
||||||
bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default();
|
let bitmap = universe
|
||||||
pair_docids.push(bitmap);
|
& bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default();
|
||||||
|
pair_docids |= bitmap;
|
||||||
}
|
}
|
||||||
let docids = MultiOps::union(pair_docids);
|
|
||||||
Ok(docids)
|
Ok(pair_docids)
|
||||||
}
|
}
|
||||||
|
@ -10,7 +10,7 @@ use crate::new::{QueryGraph, QueryNode, SearchContext};
|
|||||||
use crate::Result;
|
use crate::Result;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
// TODO: intern the strings, refer to them by their pointer?
|
// TODO: intern the proximity edges as well?
|
||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub enum WordPair {
|
pub enum WordPair {
|
||||||
@ -21,8 +21,7 @@ pub enum WordPair {
|
|||||||
|
|
||||||
#[derive(Clone)]
|
#[derive(Clone)]
|
||||||
pub struct ProximityEdge {
|
pub struct ProximityEdge {
|
||||||
// TODO: use a list of pointers to the word pairs instead?
|
pairs: Box<[WordPair]>,
|
||||||
pairs: Vec<WordPair>,
|
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -40,8 +39,9 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
|||||||
fn compute_docids<'search>(
|
fn compute_docids<'search>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
edge: &Self::EdgeDetails,
|
edge: &Self::EdgeDetails,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
) -> Result<roaring::RoaringBitmap> {
|
) -> Result<roaring::RoaringBitmap> {
|
||||||
compute_docids::compute_docids(ctx, edge)
|
compute_docids::compute_docids(ctx, edge, universe)
|
||||||
}
|
}
|
||||||
|
|
||||||
fn build_visit_from_node<'search>(
|
fn build_visit_from_node<'search>(
|
||||||
@ -61,11 +61,11 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
|||||||
|
|
||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &super::RankingRuleGraph<Self>,
|
graph: &super::RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<u32>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &[Vec<u64>],
|
distances: &[Vec<u16>],
|
||||||
cost: u64,
|
cost: u16,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) {
|
) {
|
||||||
logger.log_proximity_state(
|
logger.log_proximity_state(
|
||||||
|
@ -1,97 +0,0 @@
|
|||||||
#![allow(clippy::too_many_arguments)]
|
|
||||||
|
|
||||||
use super::edge_docids_cache::EdgeDocidsCache;
|
|
||||||
use super::empty_paths_cache::EmptyPathsCache;
|
|
||||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
|
||||||
use crate::new::{BitmapOrAllRef, SearchContext};
|
|
||||||
use crate::Result;
|
|
||||||
use roaring::{MultiOps, RoaringBitmap};
|
|
||||||
|
|
||||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|
||||||
// TODO: reduce the universe after computing each path
|
|
||||||
// TODO: deserialize roaring bitmap within a universe
|
|
||||||
pub fn resolve_paths<'search>(
|
|
||||||
&mut self,
|
|
||||||
ctx: &mut SearchContext<'search>,
|
|
||||||
edge_docids_cache: &mut EdgeDocidsCache<G>,
|
|
||||||
empty_paths_cache: &mut EmptyPathsCache,
|
|
||||||
universe: &RoaringBitmap,
|
|
||||||
mut paths: Vec<Vec<u32>>,
|
|
||||||
) -> Result<RoaringBitmap> {
|
|
||||||
paths.sort_unstable();
|
|
||||||
// let mut needs_filtering_empty_edges = false;
|
|
||||||
// let mut needs_filtering_empty_prefix = false;
|
|
||||||
// let mut needs_filtering_empty_couple_edges = false;
|
|
||||||
let mut needs_filtering = false;
|
|
||||||
let mut path_bitmaps = vec![];
|
|
||||||
'path_loop: loop {
|
|
||||||
// TODO: distinguish between empty_edges, empty_prefix, and empty_couple_edges filtering
|
|
||||||
if needs_filtering {
|
|
||||||
for path in paths.iter_mut() {
|
|
||||||
if empty_paths_cache.path_is_empty(path) {
|
|
||||||
path.clear();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
needs_filtering = false;
|
|
||||||
}
|
|
||||||
let Some(edge_indexes) = paths.pop() else {
|
|
||||||
break;
|
|
||||||
};
|
|
||||||
|
|
||||||
if edge_indexes.is_empty() {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
let mut path_bitmap = universe.clone();
|
|
||||||
let mut visited_edges = vec![];
|
|
||||||
let mut cached_edge_docids = vec![];
|
|
||||||
'edge_loop: for edge_index in edge_indexes {
|
|
||||||
visited_edges.push(edge_index);
|
|
||||||
let edge_docids =
|
|
||||||
edge_docids_cache.get_edge_docids(ctx, edge_index, self, universe)?;
|
|
||||||
match edge_docids {
|
|
||||||
BitmapOrAllRef::Bitmap(edge_docids) => {
|
|
||||||
cached_edge_docids.push((edge_index, edge_docids.clone()));
|
|
||||||
let (_, edge_docids) = cached_edge_docids.last().unwrap();
|
|
||||||
if edge_docids.is_disjoint(universe) {
|
|
||||||
// 1. Store in the cache that this edge is empty for this universe
|
|
||||||
empty_paths_cache.forbid_edge(edge_index);
|
|
||||||
// 2. remove this edge from the proximity graph
|
|
||||||
self.remove_edge(edge_index);
|
|
||||||
edge_docids_cache.cache.remove(&edge_index);
|
|
||||||
needs_filtering = true;
|
|
||||||
// needs_filtering_empty_edges = true;
|
|
||||||
// 3. continue executing this function again on the remaining paths
|
|
||||||
continue 'path_loop;
|
|
||||||
} else {
|
|
||||||
path_bitmap &= edge_docids;
|
|
||||||
if path_bitmap.is_disjoint(universe) {
|
|
||||||
// needs_filtering_empty_prefix = true;
|
|
||||||
needs_filtering = true;
|
|
||||||
empty_paths_cache.forbid_prefix(&visited_edges);
|
|
||||||
// if the intersection between this edge and any
|
|
||||||
// previous one is disjoint with the universe,
|
|
||||||
// then we add these two edges to the empty_path_cache
|
|
||||||
for (edge_index2, edge_docids2) in
|
|
||||||
cached_edge_docids[..cached_edge_docids.len() - 1].iter()
|
|
||||||
{
|
|
||||||
let intersection = edge_docids & edge_docids2;
|
|
||||||
if intersection.is_disjoint(universe) {
|
|
||||||
// needs_filtering_empty_couple_edges = true;
|
|
||||||
empty_paths_cache
|
|
||||||
.forbid_couple_edges(*edge_index2, edge_index);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
continue 'path_loop;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
BitmapOrAllRef::All => continue 'edge_loop,
|
|
||||||
}
|
|
||||||
}
|
|
||||||
path_bitmaps.push(path_bitmap);
|
|
||||||
}
|
|
||||||
|
|
||||||
Ok(MultiOps::union(path_bitmaps))
|
|
||||||
}
|
|
||||||
}
|
|
@ -31,6 +31,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
fn compute_docids<'db_cache, 'search>(
|
fn compute_docids<'db_cache, 'search>(
|
||||||
ctx: &mut SearchContext<'search>,
|
ctx: &mut SearchContext<'search>,
|
||||||
edge: &Self::EdgeDetails,
|
edge: &Self::EdgeDetails,
|
||||||
|
universe: &RoaringBitmap,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
match edge {
|
match edge {
|
||||||
TypoEdge::Phrase { phrase } => resolve_phrase(ctx, *phrase),
|
TypoEdge::Phrase { phrase } => resolve_phrase(ctx, *phrase),
|
||||||
@ -44,14 +45,17 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for word in words.iter().copied() {
|
for word in words.iter().copied() {
|
||||||
let Some(bytes) = ctx.get_word_docids(word)? else { continue };
|
let Some(bytes) = ctx.get_word_docids(word)? else { continue };
|
||||||
let bitmap =
|
// TODO: deserialize bitmap within a universe
|
||||||
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
|
let bitmap = universe
|
||||||
|
& RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
|
||||||
docids |= bitmap;
|
docids |= bitmap;
|
||||||
}
|
}
|
||||||
if *nbr_typos == 0 {
|
if *nbr_typos == 0 {
|
||||||
if let Some(bytes) = ctx.get_prefix_docids(derivations.original)? {
|
if let Some(bytes) = ctx.get_prefix_docids(derivations.original)? {
|
||||||
let bitmap =
|
// TODO: deserialize bitmap within a universe
|
||||||
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
|
let bitmap = universe
|
||||||
|
& RoaringBitmapCodec::bytes_decode(bytes)
|
||||||
|
.ok_or(heed::Error::Decoding)?;
|
||||||
docids |= bitmap;
|
docids |= bitmap;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -116,11 +120,11 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
|
|
||||||
fn log_state(
|
fn log_state(
|
||||||
graph: &RankingRuleGraph<Self>,
|
graph: &RankingRuleGraph<Self>,
|
||||||
paths: &[Vec<u32>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &[Vec<u64>],
|
distances: &[Vec<u16>],
|
||||||
cost: u64,
|
cost: u16,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) {
|
) {
|
||||||
logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances.to_vec(), cost);
|
logger.log_typo_state(graph, paths, empty_paths_cache, universe, distances.to_vec(), cost);
|
||||||
|
@ -262,46 +262,48 @@ mod tests {
|
|||||||
|
|
||||||
println!("nbr docids: {}", index.documents_ids(&txn).unwrap().len());
|
println!("nbr docids: {}", index.documents_ids(&txn).unwrap().len());
|
||||||
|
|
||||||
// loop {
|
loop {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
// let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
// let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||||
|
let mut ctx = SearchContext::new(&index, &txn);
|
||||||
|
let results = execute_search(
|
||||||
|
&mut ctx,
|
||||||
|
"which a the releases from poison by the government",
|
||||||
|
None,
|
||||||
|
0,
|
||||||
|
20,
|
||||||
|
&mut DefaultSearchLogger,
|
||||||
|
// &mut logger,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
let results = execute_search(
|
// logger.write_d2_description(&mut ctx);
|
||||||
&mut SearchContext::new(&index, &txn),
|
|
||||||
"releases from poison by the government",
|
|
||||||
None,
|
|
||||||
0,
|
|
||||||
20,
|
|
||||||
&mut DefaultSearchLogger,
|
|
||||||
// &mut logger,
|
|
||||||
)
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
// logger.write_d2_description();
|
let elapsed = start.elapsed();
|
||||||
|
println!("{}us", elapsed.as_micros());
|
||||||
|
|
||||||
let elapsed = start.elapsed();
|
let _documents = index
|
||||||
|
.documents(&txn, results.iter().copied())
|
||||||
|
.unwrap()
|
||||||
|
.into_iter()
|
||||||
|
.map(|(id, obkv)| {
|
||||||
|
let mut object = serde_json::Map::default();
|
||||||
|
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
||||||
|
let value = obkv.get(fid).unwrap();
|
||||||
|
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
|
||||||
|
object.insert(fid_name.to_owned(), value);
|
||||||
|
}
|
||||||
|
(id, serde_json::to_string_pretty(&object).unwrap())
|
||||||
|
})
|
||||||
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
let documents = index
|
println!("{}us: {:?}", elapsed.as_micros(), results);
|
||||||
.documents(&txn, results.iter().copied())
|
|
||||||
.unwrap()
|
|
||||||
.into_iter()
|
|
||||||
.map(|(id, obkv)| {
|
|
||||||
let mut object = serde_json::Map::default();
|
|
||||||
for (fid, fid_name) in index.fields_ids_map(&txn).unwrap().iter() {
|
|
||||||
let value = obkv.get(fid).unwrap();
|
|
||||||
let value: serde_json::Value = serde_json::from_slice(value).unwrap();
|
|
||||||
object.insert(fid_name.to_owned(), value);
|
|
||||||
}
|
|
||||||
(id, serde_json::to_string_pretty(&object).unwrap())
|
|
||||||
})
|
|
||||||
.collect::<Vec<_>>();
|
|
||||||
|
|
||||||
println!("{}us: {:?}", elapsed.as_micros(), results);
|
|
||||||
for (id, document) in documents {
|
|
||||||
println!("{id}:");
|
|
||||||
println!("{document}");
|
|
||||||
}
|
}
|
||||||
|
// for (id, _document) in documents {
|
||||||
|
// println!("{id}:");
|
||||||
|
// // println!("{document}");
|
||||||
|
// }
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -342,9 +344,9 @@ mod tests {
|
|||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
|
println!("{}us: {:?}", elapsed.as_micros(), docs.documents_ids);
|
||||||
for (id, document) in documents {
|
for (id, _document) in documents {
|
||||||
println!("{id}:");
|
println!("{id}:");
|
||||||
println!("{document}");
|
// println!("{document}");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
#[test]
|
#[test]
|
||||||
@ -360,7 +362,7 @@ mod tests {
|
|||||||
// loop {
|
// loop {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
// let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||||
let mut ctx = SearchContext::new(&index, &txn);
|
let mut ctx = SearchContext::new(&index, &txn);
|
||||||
let results = execute_search(
|
let results = execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
@ -368,12 +370,12 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
0,
|
0,
|
||||||
20,
|
20,
|
||||||
// &mut DefaultSearchLogger,
|
&mut DefaultSearchLogger,
|
||||||
&mut logger,
|
// &mut logger,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
logger.write_d2_description(&mut ctx);
|
// logger.write_d2_description(&mut ctx);
|
||||||
|
|
||||||
let elapsed = start.elapsed();
|
let elapsed = start.elapsed();
|
||||||
|
|
||||||
|
@ -1,5 +1,6 @@
|
|||||||
use super::interner::Interned;
|
use super::interner::Interned;
|
||||||
use super::query_term::{Phrase, QueryTerm, WordDerivations};
|
use super::query_term::{Phrase, QueryTerm, WordDerivations};
|
||||||
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, QueryNode, SearchContext};
|
use super::{QueryGraph, QueryNode, SearchContext};
|
||||||
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
|
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
|
||||||
use fxhash::FxHashMap;
|
use fxhash::FxHashMap;
|
||||||
@ -10,13 +11,13 @@ use std::collections::VecDeque;
|
|||||||
// TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc.
|
// TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc.
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct NodeDocIdsCache {
|
pub struct NodeDocIdsCache {
|
||||||
pub cache: FxHashMap<u32, RoaringBitmap>,
|
pub cache: FxHashMap<u16, RoaringBitmap>,
|
||||||
}
|
}
|
||||||
impl<'search> SearchContext<'search> {
|
impl<'search> SearchContext<'search> {
|
||||||
fn get_node_docids<'cache>(
|
fn get_node_docids<'cache>(
|
||||||
&'cache mut self,
|
&'cache mut self,
|
||||||
term: &QueryTerm,
|
term: &QueryTerm,
|
||||||
node_idx: u32,
|
node_idx: u16,
|
||||||
) -> Result<&'cache RoaringBitmap> {
|
) -> Result<&'cache RoaringBitmap> {
|
||||||
if self.node_docids_cache.cache.contains_key(&node_idx) {
|
if self.node_docids_cache.cache.contains_key(&node_idx) {
|
||||||
return Ok(&self.node_docids_cache.cache[&node_idx]);
|
return Ok(&self.node_docids_cache.cache[&node_idx]);
|
||||||
@ -76,7 +77,7 @@ pub fn resolve_query_graph<'search>(
|
|||||||
// TODO: there is definitely a faster way to compute this big
|
// TODO: there is definitely a faster way to compute this big
|
||||||
// roaring bitmap expression
|
// roaring bitmap expression
|
||||||
|
|
||||||
let mut nodes_resolved = RoaringBitmap::new();
|
let mut nodes_resolved = SmallBitmap::new(64);
|
||||||
let mut path_nodes_docids = vec![RoaringBitmap::new(); q.nodes.len()];
|
let mut path_nodes_docids = vec![RoaringBitmap::new(); q.nodes.len()];
|
||||||
|
|
||||||
let mut next_nodes_to_visit = VecDeque::new();
|
let mut next_nodes_to_visit = VecDeque::new();
|
||||||
@ -89,8 +90,10 @@ pub fn resolve_query_graph<'search>(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Take union of all predecessors
|
// Take union of all predecessors
|
||||||
let predecessors_iter = predecessors.iter().map(|p| &path_nodes_docids[p as usize]);
|
let mut predecessors_docids = RoaringBitmap::new();
|
||||||
let predecessors_docids = MultiOps::union(predecessors_iter);
|
for p in predecessors.iter() {
|
||||||
|
predecessors_docids |= &path_nodes_docids[p as usize];
|
||||||
|
}
|
||||||
|
|
||||||
let n = &q.nodes[node as usize];
|
let n = &q.nodes[node as usize];
|
||||||
|
|
||||||
|
271
milli/src/search/new/small_bitmap.rs
Normal file
271
milli/src/search/new/small_bitmap.rs
Normal file
@ -0,0 +1,271 @@
|
|||||||
|
// #[macro_export]
|
||||||
|
// macro_rules! iter_bitmap {
|
||||||
|
// ($bitmap:expr, $id:lifetime, $p:pat, $body:block) => {
|
||||||
|
// match $bitmap {
|
||||||
|
// SmallBitmap::Tiny(mut set) => {
|
||||||
|
// while set > 0 {
|
||||||
|
// let $p = set.trailing_zeros() as u16;
|
||||||
|
// $body;
|
||||||
|
// set &= set - 1;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// SmallBitmap::Small(sets) => {
|
||||||
|
// let mut base = 0;
|
||||||
|
// for set in sets.iter() {
|
||||||
|
// let mut set = *set;
|
||||||
|
// while set > 0 {
|
||||||
|
// let idx = set.trailing_zeros() as u16;
|
||||||
|
// let $p = idx + base;
|
||||||
|
// set &= set - 1;
|
||||||
|
// $body;
|
||||||
|
// }
|
||||||
|
// base += 64;
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// };
|
||||||
|
// }
|
||||||
|
|
||||||
|
#[derive(Clone)]
|
||||||
|
pub enum SmallBitmap {
|
||||||
|
Tiny(u64),
|
||||||
|
Small(Box<[u64]>),
|
||||||
|
}
|
||||||
|
impl SmallBitmap {
|
||||||
|
pub fn new(universe_length: u16) -> Self {
|
||||||
|
if universe_length <= 64 {
|
||||||
|
Self::Tiny(0)
|
||||||
|
} else {
|
||||||
|
Self::Small(vec![0; 1 + universe_length as usize / 64].into_boxed_slice())
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn from_iter(xs: impl Iterator<Item = u16>, universe_length: u16) -> Self {
|
||||||
|
let mut s = Self::new(universe_length);
|
||||||
|
for x in xs {
|
||||||
|
s.insert(x);
|
||||||
|
}
|
||||||
|
s
|
||||||
|
}
|
||||||
|
pub fn from_array(xs: &[u16], universe_length: u16) -> Self {
|
||||||
|
let mut s = Self::new(universe_length);
|
||||||
|
for x in xs {
|
||||||
|
s.insert(*x);
|
||||||
|
}
|
||||||
|
s
|
||||||
|
}
|
||||||
|
pub fn is_empty(&self) -> bool {
|
||||||
|
match self {
|
||||||
|
SmallBitmap::Tiny(set) => *set == 0,
|
||||||
|
SmallBitmap::Small(sets) => {
|
||||||
|
for set in sets.iter() {
|
||||||
|
if *set != 0 {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn clear(&mut self) {
|
||||||
|
match self {
|
||||||
|
SmallBitmap::Tiny(set) => *set = 0,
|
||||||
|
SmallBitmap::Small(sets) => {
|
||||||
|
for set in sets.iter_mut() {
|
||||||
|
*set = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn contains(&self, mut x: u16) -> bool {
|
||||||
|
let set = match self {
|
||||||
|
SmallBitmap::Tiny(set) => *set,
|
||||||
|
SmallBitmap::Small(set) => {
|
||||||
|
let idx = x / 64;
|
||||||
|
x %= 64;
|
||||||
|
set[idx as usize]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
set & 0b1 << x != 0
|
||||||
|
}
|
||||||
|
pub fn insert(&mut self, mut x: u16) {
|
||||||
|
let set = match self {
|
||||||
|
SmallBitmap::Tiny(set) => set,
|
||||||
|
SmallBitmap::Small(set) => {
|
||||||
|
let idx = x / 64;
|
||||||
|
x %= 64;
|
||||||
|
&mut set[idx as usize]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
*set |= 0b1 << x;
|
||||||
|
}
|
||||||
|
pub fn remove(&mut self, mut x: u16) {
|
||||||
|
let set = match self {
|
||||||
|
SmallBitmap::Tiny(set) => set,
|
||||||
|
SmallBitmap::Small(set) => {
|
||||||
|
let idx = x / 64;
|
||||||
|
x %= 64;
|
||||||
|
&mut set[idx as usize]
|
||||||
|
}
|
||||||
|
};
|
||||||
|
*set &= !(0b1 << x);
|
||||||
|
}
|
||||||
|
// fn iter_single(mut set: u64, mut visit: impl FnMut(u16) -> Result<()>) -> Result<()> {
|
||||||
|
// while set > 0 {
|
||||||
|
// let idx = set.trailing_zeros() as u16;
|
||||||
|
// visit(idx)?;
|
||||||
|
// set &= set - 1;
|
||||||
|
// }
|
||||||
|
// Ok(())
|
||||||
|
// }
|
||||||
|
// pub fn iter(&self, mut visit: impl FnMut(u16) -> Result<()>) -> Result<()> {
|
||||||
|
// match self {
|
||||||
|
// SmallBitmap::Tiny(set) => Self::iter_single(*set, &mut visit),
|
||||||
|
// SmallBitmap::Small(sets) => {
|
||||||
|
// let mut base = 0;
|
||||||
|
// for set in sets.iter() {
|
||||||
|
// Self::iter_single(*set, |x| visit(base + x))?;
|
||||||
|
// base += 64;
|
||||||
|
// }
|
||||||
|
// Ok(())
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
// }
|
||||||
|
|
||||||
|
pub fn intersection(&mut self, other: &SmallBitmap) {
|
||||||
|
self.apply_op(other, |a, b| *a &= b);
|
||||||
|
}
|
||||||
|
pub fn union(&mut self, other: &SmallBitmap) {
|
||||||
|
self.apply_op(other, |a, b| *a |= b);
|
||||||
|
}
|
||||||
|
pub fn subtract(&mut self, other: &SmallBitmap) {
|
||||||
|
self.apply_op(other, |a, b| *a &= !b);
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn apply_op(&mut self, other: &SmallBitmap, op: impl Fn(&mut u64, u64)) {
|
||||||
|
match (self, other) {
|
||||||
|
(SmallBitmap::Tiny(a), SmallBitmap::Tiny(b)) => op(a, *b),
|
||||||
|
(SmallBitmap::Small(a), SmallBitmap::Small(b)) => {
|
||||||
|
assert!(a.len() == b.len(),);
|
||||||
|
for (a, b) in a.iter_mut().zip(b.iter()) {
|
||||||
|
op(a, *b);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
panic!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn all_satisfy_op(&self, other: &SmallBitmap, op: impl Fn(u64, u64) -> bool) -> bool {
|
||||||
|
match (self, other) {
|
||||||
|
(SmallBitmap::Tiny(a), SmallBitmap::Tiny(b)) => op(*a, *b),
|
||||||
|
(SmallBitmap::Small(a), SmallBitmap::Small(b)) => {
|
||||||
|
assert!(a.len() == b.len());
|
||||||
|
for (a, b) in a.iter().zip(b.iter()) {
|
||||||
|
if !op(*a, *b) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
true
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
panic!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn any_satisfy_op(&self, other: &SmallBitmap, op: impl Fn(u64, u64) -> bool) -> bool {
|
||||||
|
match (self, other) {
|
||||||
|
(SmallBitmap::Tiny(a), SmallBitmap::Tiny(b)) => op(*a, *b),
|
||||||
|
(SmallBitmap::Small(a), SmallBitmap::Small(b)) => {
|
||||||
|
assert!(a.len() == b.len());
|
||||||
|
for (a, b) in a.iter().zip(b.iter()) {
|
||||||
|
if op(*a, *b) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
false
|
||||||
|
}
|
||||||
|
_ => {
|
||||||
|
panic!();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
pub fn is_subset(&self, other: &SmallBitmap) -> bool {
|
||||||
|
self.all_satisfy_op(other, |a, b| a & !b == 0)
|
||||||
|
}
|
||||||
|
pub fn intersects(&self, other: &SmallBitmap) -> bool {
|
||||||
|
self.any_satisfy_op(other, |a, b| a & b != 0)
|
||||||
|
}
|
||||||
|
pub fn iter(&self) -> SmallBitmapIter<'_> {
|
||||||
|
match self {
|
||||||
|
SmallBitmap::Tiny(x) => SmallBitmapIter::Tiny(*x),
|
||||||
|
SmallBitmap::Small(xs) => {
|
||||||
|
SmallBitmapIter::Small { cur: xs[0], next: &xs[1..], base: 0 }
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub enum SmallBitmapIter<'b> {
|
||||||
|
Tiny(u64),
|
||||||
|
Small { cur: u64, next: &'b [u64], base: u16 },
|
||||||
|
}
|
||||||
|
impl<'b> Iterator for SmallBitmapIter<'b> {
|
||||||
|
type Item = u16;
|
||||||
|
|
||||||
|
fn next(&mut self) -> Option<Self::Item> {
|
||||||
|
match self {
|
||||||
|
SmallBitmapIter::Tiny(set) => {
|
||||||
|
if *set > 0 {
|
||||||
|
let idx = set.trailing_zeros() as u16;
|
||||||
|
*set &= *set - 1;
|
||||||
|
Some(idx)
|
||||||
|
} else {
|
||||||
|
None
|
||||||
|
}
|
||||||
|
}
|
||||||
|
SmallBitmapIter::Small { cur, next, base } => {
|
||||||
|
if *cur > 0 {
|
||||||
|
let idx = cur.trailing_zeros() as u16;
|
||||||
|
*cur &= *cur - 1;
|
||||||
|
Some(idx + *base)
|
||||||
|
} else if next.is_empty() {
|
||||||
|
return None;
|
||||||
|
} else {
|
||||||
|
*base += 64;
|
||||||
|
*cur = next[0];
|
||||||
|
*next = &next[1..];
|
||||||
|
self.next()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::SmallBitmap;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_small_bitmap() {
|
||||||
|
let mut bitmap1 = SmallBitmap::new(32);
|
||||||
|
for x in 0..16 {
|
||||||
|
bitmap1.insert(x * 2);
|
||||||
|
}
|
||||||
|
let mut bitmap2 = SmallBitmap::new(32);
|
||||||
|
for x in 0..=10 {
|
||||||
|
bitmap2.insert(x * 3);
|
||||||
|
}
|
||||||
|
bitmap1.intersection(&bitmap2);
|
||||||
|
// println!("{}", bitmap.contains(12));
|
||||||
|
// bitmap1
|
||||||
|
// .iter(|x| {
|
||||||
|
// println!("{x}");
|
||||||
|
// Ok(())
|
||||||
|
// })
|
||||||
|
// .unwrap();
|
||||||
|
|
||||||
|
// iter_bitmap!(bitmap1, 'loop1, x, {
|
||||||
|
// println!("{x}");
|
||||||
|
// })
|
||||||
|
}
|
||||||
|
}
|
Loading…
Reference in New Issue
Block a user