mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-02-21 09:48:28 +01:00
Add a few more optimisations to new search algorithms
This commit is contained in:
parent
9051065c22
commit
10626dddfc
@ -6,7 +6,7 @@ use std::collections::hash_map::Entry;
|
|||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct DatabaseCache<'search> {
|
pub struct DatabaseCache<'search> {
|
||||||
// TODO: interner for all database cache keys
|
// TODO: interner for all database cache keys?
|
||||||
pub word_pair_proximity_docids:
|
pub word_pair_proximity_docids:
|
||||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'search [u8]>>,
|
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'search [u8]>>,
|
||||||
pub word_prefix_pair_proximity_docids:
|
pub word_prefix_pair_proximity_docids:
|
||||||
|
@ -2,6 +2,7 @@ use super::logger::SearchLogger;
|
|||||||
use super::ranking_rule_graph::EdgeDocidsCache;
|
use super::ranking_rule_graph::EdgeDocidsCache;
|
||||||
use super::ranking_rule_graph::EmptyPathsCache;
|
use super::ranking_rule_graph::EmptyPathsCache;
|
||||||
use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait};
|
use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::SearchContext;
|
use super::SearchContext;
|
||||||
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput};
|
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@ -21,7 +22,7 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
|||||||
graph: RankingRuleGraph<G>,
|
graph: RankingRuleGraph<G>,
|
||||||
edge_docids_cache: EdgeDocidsCache<G>,
|
edge_docids_cache: EdgeDocidsCache<G>,
|
||||||
empty_paths_cache: EmptyPathsCache,
|
empty_paths_cache: EmptyPathsCache,
|
||||||
all_distances: Vec<Vec<u16>>,
|
all_distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||||
cur_distance_idx: usize,
|
cur_distance_idx: usize,
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -65,7 +66,6 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
query_graph: &QueryGraph,
|
query_graph: &QueryGraph,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
// TODO: update old state instead of starting from scratch
|
|
||||||
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
||||||
let mut edge_docids_cache = EdgeDocidsCache::default();
|
let mut edge_docids_cache = EdgeDocidsCache::default();
|
||||||
let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len() as u16);
|
let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len() as u16);
|
||||||
@ -77,7 +77,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
universe,
|
universe,
|
||||||
&mut empty_paths_cache,
|
&mut empty_paths_cache,
|
||||||
)?;
|
)?;
|
||||||
let all_distances = graph.initialize_distances_cheapest();
|
let all_distances = graph.initialize_distances_with_necessary_edges();
|
||||||
|
|
||||||
let state = GraphBasedRankingRuleState {
|
let state = GraphBasedRankingRuleState {
|
||||||
graph,
|
graph,
|
||||||
@ -100,6 +100,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||||
assert!(universe.len() > 1);
|
assert!(universe.len() > 1);
|
||||||
let mut state = self.state.take().unwrap();
|
let mut state = self.state.take().unwrap();
|
||||||
|
|
||||||
remove_empty_edges(
|
remove_empty_edges(
|
||||||
ctx,
|
ctx,
|
||||||
&mut state.graph,
|
&mut state.graph,
|
||||||
@ -114,7 +115,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
self.state = None;
|
self.state = None;
|
||||||
return Ok(None);
|
return Ok(None);
|
||||||
}
|
}
|
||||||
let cost =
|
let (cost, _) =
|
||||||
state.all_distances[state.graph.query_graph.root_node as usize][state.cur_distance_idx];
|
state.all_distances[state.graph.query_graph.root_node as usize][state.cur_distance_idx];
|
||||||
state.cur_distance_idx += 1;
|
state.cur_distance_idx += 1;
|
||||||
|
|
||||||
@ -132,12 +133,15 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
let original_universe = universe;
|
let original_universe = universe;
|
||||||
let mut universe = universe.clone();
|
let mut universe = universe.clone();
|
||||||
|
|
||||||
|
// TODO: remove this unnecessary clone
|
||||||
|
let original_graph = graph.clone();
|
||||||
graph.visit_paths_of_cost(
|
graph.visit_paths_of_cost(
|
||||||
graph.query_graph.root_node as usize,
|
graph.query_graph.root_node as usize,
|
||||||
cost,
|
cost,
|
||||||
all_distances,
|
all_distances,
|
||||||
empty_paths_cache,
|
empty_paths_cache,
|
||||||
|path, graph, empty_paths_cache| {
|
|path, graph, empty_paths_cache| {
|
||||||
|
paths.push(path.to_vec());
|
||||||
let mut path_docids = universe.clone();
|
let mut path_docids = universe.clone();
|
||||||
let mut visited_edges = vec![];
|
let mut visited_edges = vec![];
|
||||||
let mut cached_edge_docids = vec![];
|
let mut cached_edge_docids = vec![];
|
||||||
@ -161,7 +165,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
path_docids &= edge_docids;
|
path_docids &= edge_docids;
|
||||||
|
|
||||||
if path_docids.is_disjoint(&universe) {
|
if path_docids.is_disjoint(&universe) {
|
||||||
empty_paths_cache.forbid_prefix(&visited_edges);
|
// empty_paths_cache.forbid_prefix(&visited_edges);
|
||||||
// if the intersection between this edge and any
|
// if the intersection between this edge and any
|
||||||
// previous one is disjoint with the universe,
|
// previous one is disjoint with the universe,
|
||||||
// then we add these two edges to the empty_path_cache
|
// then we add these two edges to the empty_path_cache
|
||||||
@ -170,14 +174,12 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
{
|
{
|
||||||
let intersection = edge_docids & edge_docids2;
|
let intersection = edge_docids & edge_docids2;
|
||||||
if intersection.is_disjoint(&universe) {
|
if intersection.is_disjoint(&universe) {
|
||||||
// needs_filtering_empty_couple_edges = true;
|
|
||||||
empty_paths_cache.forbid_couple_edges(*edge_index2, edge_index);
|
empty_paths_cache.forbid_couple_edges(*edge_index2, edge_index);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
paths.push(path.to_vec());
|
|
||||||
bucket |= &path_docids;
|
bucket |= &path_docids;
|
||||||
universe -= path_docids;
|
universe -= path_docids;
|
||||||
Ok(())
|
Ok(())
|
||||||
@ -185,7 +187,7 @@ impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
|||||||
)?;
|
)?;
|
||||||
|
|
||||||
G::log_state(
|
G::log_state(
|
||||||
&state.graph,
|
&original_graph,
|
||||||
&paths,
|
&paths,
|
||||||
&state.empty_paths_cache,
|
&state.empty_paths_cache,
|
||||||
original_universe,
|
original_universe,
|
||||||
|
@ -6,6 +6,7 @@ use std::time::Instant;
|
|||||||
use std::{io::Write, path::PathBuf};
|
use std::{io::Write, path::PathBuf};
|
||||||
|
|
||||||
use crate::new::ranking_rule_graph::TypoGraph;
|
use crate::new::ranking_rule_graph::TypoGraph;
|
||||||
|
use crate::new::small_bitmap::SmallBitmap;
|
||||||
use crate::new::{QueryNode, QueryGraph, SearchContext};
|
use crate::new::{QueryNode, QueryGraph, SearchContext};
|
||||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||||
use crate::new::ranking_rule_graph::EmptyPathsCache;
|
use crate::new::ranking_rule_graph::EmptyPathsCache;
|
||||||
@ -45,7 +46,7 @@ pub enum SearchEvents {
|
|||||||
paths: Vec<Vec<u16>>,
|
paths: Vec<Vec<u16>>,
|
||||||
empty_paths_cache: EmptyPathsCache,
|
empty_paths_cache: EmptyPathsCache,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
distances: Vec<Vec<u16>>,
|
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
},
|
},
|
||||||
TypoState {
|
TypoState {
|
||||||
@ -53,7 +54,7 @@ pub enum SearchEvents {
|
|||||||
paths: Vec<Vec<u16>>,
|
paths: Vec<Vec<u16>>,
|
||||||
empty_paths_cache: EmptyPathsCache,
|
empty_paths_cache: EmptyPathsCache,
|
||||||
universe: RoaringBitmap,
|
universe: RoaringBitmap,
|
||||||
distances: Vec<Vec<u16>>,
|
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
},
|
},
|
||||||
RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap, time: Instant },
|
RankingRuleSkipBucket { ranking_rule_idx: usize, candidates: RoaringBitmap, time: Instant },
|
||||||
@ -165,11 +166,11 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
|||||||
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
|
self.events.push(SearchEvents::WordsState { query_graph: query_graph.clone() });
|
||||||
}
|
}
|
||||||
|
|
||||||
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<u16>>, cost: u16,) {
|
fn log_proximity_state(&mut self, query_graph: &RankingRuleGraph<ProximityGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<(u16, SmallBitmap)>>, cost: u16,) {
|
||||||
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
self.events.push(SearchEvents::ProximityState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
||||||
}
|
}
|
||||||
|
|
||||||
fn log_typo_state(&mut self, query_graph: &RankingRuleGraph<TypoGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<u16>>, cost: u16,) {
|
fn log_typo_state(&mut self, query_graph: &RankingRuleGraph<TypoGraph>, paths_map: &[Vec<u16>], empty_paths_cache: &EmptyPathsCache, universe: &RoaringBitmap, distances: Vec<Vec<(u16, SmallBitmap)>>, cost: u16,) {
|
||||||
self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
self.events.push(SearchEvents::TypoState { graph: query_graph.clone(), paths: paths_map.to_vec(), empty_paths_cache: empty_paths_cache.clone(), universe: universe.clone(), distances, cost })
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -352,7 +353,7 @@ results.{random} {{
|
|||||||
writeln!(&mut file, "}}").unwrap();
|
writeln!(&mut file, "}}").unwrap();
|
||||||
}
|
}
|
||||||
|
|
||||||
fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, _distances: &[u16], file: &mut File) {
|
fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, distances: &[(u16, SmallBitmap)], file: &mut File) {
|
||||||
match &node {
|
match &node {
|
||||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => {
|
QueryNode::Term(LocatedQueryTerm { value, .. }) => {
|
||||||
match value {
|
match value {
|
||||||
@ -390,9 +391,9 @@ shape: class").unwrap();
|
|||||||
if *use_prefix_db {
|
if *use_prefix_db {
|
||||||
writeln!(file, "use prefix DB : true").unwrap();
|
writeln!(file, "use prefix DB : true").unwrap();
|
||||||
}
|
}
|
||||||
// for (i, d) in distances.iter().enumerate() {
|
for (d, edges) in distances.iter() {
|
||||||
// writeln!(file, "\"distances\" : {d}").unwrap();
|
writeln!(file, "\"distance {d}\" : {:?}", edges.iter().collect::<Vec<_>>() ).unwrap();
|
||||||
// }
|
}
|
||||||
|
|
||||||
writeln!(file, "}}").unwrap();
|
writeln!(file, "}}").unwrap();
|
||||||
},
|
},
|
||||||
@ -420,7 +421,7 @@ shape: class").unwrap();
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<u16>>, file: &mut File) {
|
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u16>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<(u16, SmallBitmap)>>, file: &mut File) {
|
||||||
writeln!(file,"direction: right").unwrap();
|
writeln!(file,"direction: right").unwrap();
|
||||||
|
|
||||||
writeln!(file, "Proximity Graph {{").unwrap();
|
writeln!(file, "Proximity Graph {{").unwrap();
|
||||||
@ -477,7 +478,7 @@ shape: class").unwrap();
|
|||||||
// }
|
// }
|
||||||
// writeln!(file, "}}").unwrap();
|
// writeln!(file, "}}").unwrap();
|
||||||
}
|
}
|
||||||
fn edge_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext,graph: &RankingRuleGraph<R>, edge_idx: u16, file: &mut File) {
|
fn edge_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, edge_idx: u16, file: &mut File) {
|
||||||
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
|
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
|
||||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
||||||
let from_node_desc = match from_node {
|
let from_node_desc = match from_node {
|
||||||
|
@ -5,6 +5,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use super::{
|
use super::{
|
||||||
ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph},
|
ranking_rule_graph::{EmptyPathsCache, ProximityGraph, RankingRuleGraph, TypoGraph},
|
||||||
|
small_bitmap::SmallBitmap,
|
||||||
RankingRule, RankingRuleQueryTrait,
|
RankingRule, RankingRuleQueryTrait,
|
||||||
};
|
};
|
||||||
|
|
||||||
@ -61,7 +62,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
|||||||
_paths_map: &[Vec<u16>],
|
_paths_map: &[Vec<u16>],
|
||||||
_empty_paths_cache: &EmptyPathsCache,
|
_empty_paths_cache: &EmptyPathsCache,
|
||||||
_universe: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
_distances: Vec<Vec<u16>>,
|
_distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||||
_cost: u16,
|
_cost: u16,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
@ -72,7 +73,7 @@ impl<Q: RankingRuleQueryTrait> SearchLogger<Q> for DefaultSearchLogger {
|
|||||||
_paths: &[Vec<u16>],
|
_paths: &[Vec<u16>],
|
||||||
_empty_paths_cache: &EmptyPathsCache,
|
_empty_paths_cache: &EmptyPathsCache,
|
||||||
_universe: &RoaringBitmap,
|
_universe: &RoaringBitmap,
|
||||||
_distances: Vec<Vec<u16>>,
|
_distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||||
_cost: u16,
|
_cost: u16,
|
||||||
) {
|
) {
|
||||||
}
|
}
|
||||||
@ -123,7 +124,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
|||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: Vec<Vec<u16>>,
|
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
);
|
);
|
||||||
|
|
||||||
@ -133,7 +134,7 @@ pub trait SearchLogger<Q: RankingRuleQueryTrait> {
|
|||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: Vec<Vec<u16>>,
|
distances: Vec<Vec<(u16, SmallBitmap)>>,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
);
|
);
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,8 @@ use super::empty_paths_cache::EmptyPathsCache;
|
|||||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||||
use crate::new::small_bitmap::SmallBitmap;
|
use crate::new::small_bitmap::SmallBitmap;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
use std::collections::VecDeque;
|
use std::collections::btree_map::Entry;
|
||||||
|
use std::collections::{BTreeMap, VecDeque};
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
|
||||||
pub struct Path {
|
pub struct Path {
|
||||||
@ -17,7 +18,7 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
from: usize,
|
from: usize,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
all_distances: &[Vec<u16>],
|
all_distances: &[Vec<(u16, SmallBitmap)>],
|
||||||
empty_paths_cache: &mut EmptyPathsCache,
|
empty_paths_cache: &mut EmptyPathsCache,
|
||||||
mut visit: impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
|
mut visit: impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
|
||||||
) -> Result<()> {
|
) -> Result<()> {
|
||||||
@ -37,13 +38,9 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
&mut self,
|
&mut self,
|
||||||
from: usize,
|
from: usize,
|
||||||
cost: u16,
|
cost: u16,
|
||||||
// TODO: replace all_distances with a Vec<SmallBitmap> where the SmallBitmap contains true if the cost exists and false otherwise
|
all_distances: &[Vec<(u16, SmallBitmap)>],
|
||||||
all_distances: &[Vec<u16>],
|
|
||||||
empty_paths_cache: &mut EmptyPathsCache,
|
empty_paths_cache: &mut EmptyPathsCache,
|
||||||
visit: &mut impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
|
visit: &mut impl FnMut(&[u16], &mut Self, &mut EmptyPathsCache) -> Result<()>,
|
||||||
// replace prev edges by:
|
|
||||||
// (1) a small bitmap representing the path
|
|
||||||
// (2) a pointer within the EmptyPathsCache::forbidden_prefixes structure
|
|
||||||
prev_edges: &mut Vec<u16>,
|
prev_edges: &mut Vec<u16>,
|
||||||
cur_path: &mut SmallBitmap,
|
cur_path: &mut SmallBitmap,
|
||||||
mut forbidden_edges: SmallBitmap,
|
mut forbidden_edges: SmallBitmap,
|
||||||
@ -55,7 +52,12 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
let Some(edge) = self.all_edges[edge_idx as usize].as_ref() else { continue };
|
let Some(edge) = self.all_edges[edge_idx as usize].as_ref() else { continue };
|
||||||
if cost < edge.cost as u16
|
if cost < edge.cost as u16
|
||||||
|| forbidden_edges.contains(edge_idx)
|
|| forbidden_edges.contains(edge_idx)
|
||||||
|| !all_distances[edge.to_node as usize].contains(&(cost - edge.cost as u16))
|
|| !all_distances[edge.to_node as usize].iter().any(
|
||||||
|
|(next_cost, necessary_edges)| {
|
||||||
|
(*next_cost == cost - edge.cost as u16)
|
||||||
|
&& !forbidden_edges.intersects(necessary_edges)
|
||||||
|
},
|
||||||
|
)
|
||||||
{
|
{
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -99,21 +101,20 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
forbidden_edges.insert(x);
|
forbidden_edges.insert(x);
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
if next_any_valid && empty_paths_cache.path_is_empty(prev_edges, cur_path) {
|
|
||||||
return Ok(any_valid);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(any_valid)
|
Ok(any_valid)
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn initialize_distances_cheapest(&self) -> Vec<Vec<u16>> {
|
pub fn initialize_distances_with_necessary_edges(&self) -> Vec<Vec<(u16, SmallBitmap)>> {
|
||||||
let mut distances_to_end: Vec<Vec<u16>> = vec![vec![]; self.query_graph.nodes.len()];
|
let mut distances_to_end: Vec<Vec<(u16, SmallBitmap)>> =
|
||||||
|
vec![vec![]; self.query_graph.nodes.len()];
|
||||||
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len() as u16);
|
let mut enqueued = SmallBitmap::new(self.query_graph.nodes.len() as u16);
|
||||||
|
|
||||||
let mut node_stack = VecDeque::new();
|
let mut node_stack = VecDeque::new();
|
||||||
|
|
||||||
distances_to_end[self.query_graph.end_node as usize] = vec![0];
|
distances_to_end[self.query_graph.end_node as usize] =
|
||||||
|
vec![(0, SmallBitmap::new(self.all_edges.len() as u16))];
|
||||||
|
|
||||||
for prev_node in
|
for prev_node in
|
||||||
self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter()
|
self.query_graph.edges[self.query_graph.end_node as usize].predecessors.iter()
|
||||||
@ -123,21 +124,29 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
while let Some(cur_node) = node_stack.pop_front() {
|
while let Some(cur_node) = node_stack.pop_front() {
|
||||||
let mut self_distances = vec![];
|
let mut self_distances = BTreeMap::<u16, SmallBitmap>::new();
|
||||||
|
|
||||||
let cur_node_edges = &self.node_edges[cur_node];
|
let cur_node_edges = &self.node_edges[cur_node];
|
||||||
for edge_idx in cur_node_edges.iter() {
|
for edge_idx in cur_node_edges.iter() {
|
||||||
let edge = self.all_edges[edge_idx as usize].as_ref().unwrap();
|
let edge = self.all_edges[edge_idx as usize].as_ref().unwrap();
|
||||||
let succ_node = edge.to_node;
|
let succ_node = edge.to_node;
|
||||||
let succ_distances = &distances_to_end[succ_node as usize];
|
let succ_distances = &distances_to_end[succ_node as usize];
|
||||||
for succ_distance in succ_distances {
|
for (succ_distance, succ_necessary_edges) in succ_distances {
|
||||||
self_distances.push(edge.cost as u16 + succ_distance);
|
let potential_necessary_edges = SmallBitmap::from_iter(
|
||||||
|
std::iter::once(edge_idx).chain(succ_necessary_edges.iter()),
|
||||||
|
self.all_edges.len() as u16,
|
||||||
|
);
|
||||||
|
match self_distances.entry(edge.cost as u16 + succ_distance) {
|
||||||
|
Entry::Occupied(mut prev_necessary_edges) => {
|
||||||
|
prev_necessary_edges.get_mut().intersection(&potential_necessary_edges);
|
||||||
|
}
|
||||||
|
Entry::Vacant(entry) => {
|
||||||
|
entry.insert(potential_necessary_edges);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
self_distances.sort_unstable();
|
}
|
||||||
self_distances.dedup();
|
distances_to_end[cur_node] = self_distances.into_iter().collect();
|
||||||
distances_to_end[cur_node] = self_distances;
|
|
||||||
for prev_node in self.query_graph.edges[cur_node].predecessors.iter() {
|
for prev_node in self.query_graph.edges[cur_node].predecessors.iter() {
|
||||||
if !enqueued.contains(prev_node) {
|
if !enqueued.contains(prev_node) {
|
||||||
node_stack.push_back(prev_node as usize);
|
node_stack.push_back(prev_node as usize);
|
||||||
|
@ -49,6 +49,9 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
|||||||
if self.cache.contains_key(&edge_index) {
|
if self.cache.contains_key(&edge_index) {
|
||||||
// TODO: should we update the bitmap in the cache if the new universe
|
// TODO: should we update the bitmap in the cache if the new universe
|
||||||
// reduces it?
|
// reduces it?
|
||||||
|
// TODO: maybe have a generation: u32 to track every time the universe was
|
||||||
|
// reduced. Then only attempt to recompute the intersection when there is a chance
|
||||||
|
// that edge_docids & universe changed
|
||||||
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
|
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
|
||||||
}
|
}
|
||||||
// TODO: maybe universe doesn't belong here
|
// TODO: maybe universe doesn't belong here
|
||||||
|
@ -119,7 +119,7 @@ pub trait RankingRuleGraphTrait: Sized {
|
|||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &[Vec<u16>],
|
distances: &[Vec<(u16, SmallBitmap)>],
|
||||||
cost: u16,
|
cost: u16,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
);
|
);
|
||||||
|
@ -6,6 +6,7 @@ use super::{EdgeDetails, RankingRuleGraphTrait};
|
|||||||
use crate::new::interner::Interned;
|
use crate::new::interner::Interned;
|
||||||
use crate::new::logger::SearchLogger;
|
use crate::new::logger::SearchLogger;
|
||||||
use crate::new::query_term::WordDerivations;
|
use crate::new::query_term::WordDerivations;
|
||||||
|
use crate::new::small_bitmap::SmallBitmap;
|
||||||
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -64,7 +65,7 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
|||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &[Vec<u16>],
|
distances: &[Vec<(u16, SmallBitmap)>],
|
||||||
cost: u16,
|
cost: u16,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) {
|
) {
|
||||||
|
@ -4,6 +4,7 @@ use crate::new::interner::Interned;
|
|||||||
use crate::new::logger::SearchLogger;
|
use crate::new::logger::SearchLogger;
|
||||||
use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
||||||
use crate::new::resolve_query_graph::resolve_phrase;
|
use crate::new::resolve_query_graph::resolve_phrase;
|
||||||
|
use crate::new::small_bitmap::SmallBitmap;
|
||||||
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
||||||
use crate::{Result, RoaringBitmapCodec};
|
use crate::{Result, RoaringBitmapCodec};
|
||||||
use heed::BytesDecode;
|
use heed::BytesDecode;
|
||||||
@ -123,7 +124,7 @@ impl RankingRuleGraphTrait for TypoGraph {
|
|||||||
paths: &[Vec<u16>],
|
paths: &[Vec<u16>],
|
||||||
empty_paths_cache: &EmptyPathsCache,
|
empty_paths_cache: &EmptyPathsCache,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
distances: &[Vec<u16>],
|
distances: &[Vec<(u16, SmallBitmap)>],
|
||||||
cost: u16,
|
cost: u16,
|
||||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||||
) {
|
) {
|
||||||
|
@ -262,7 +262,7 @@ mod tests {
|
|||||||
|
|
||||||
println!("nbr docids: {}", index.documents_ids(&txn).unwrap().len());
|
println!("nbr docids: {}", index.documents_ids(&txn).unwrap().len());
|
||||||
|
|
||||||
loop {
|
// loop {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
// let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
// let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||||
@ -299,7 +299,7 @@ mod tests {
|
|||||||
.collect::<Vec<_>>();
|
.collect::<Vec<_>>();
|
||||||
|
|
||||||
println!("{}us: {:?}", elapsed.as_micros(), results);
|
println!("{}us: {:?}", elapsed.as_micros(), results);
|
||||||
}
|
// }
|
||||||
// for (id, _document) in documents {
|
// for (id, _document) in documents {
|
||||||
// println!("{id}:");
|
// println!("{id}:");
|
||||||
// // println!("{document}");
|
// // println!("{document}");
|
||||||
@ -321,7 +321,7 @@ mod tests {
|
|||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let mut s = Search::new(&txn, &index);
|
let mut s = Search::new(&txn, &index);
|
||||||
s.query("releases from poison by the government");
|
s.query("which a the releases from poison by the government");
|
||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
|
s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
|
||||||
let docs = s.execute().unwrap();
|
let docs = s.execute().unwrap();
|
||||||
@ -362,7 +362,7 @@ mod tests {
|
|||||||
// loop {
|
// loop {
|
||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
// let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||||
let mut ctx = SearchContext::new(&index, &txn);
|
let mut ctx = SearchContext::new(&index, &txn);
|
||||||
let results = execute_search(
|
let results = execute_search(
|
||||||
&mut ctx,
|
&mut ctx,
|
||||||
@ -370,12 +370,12 @@ mod tests {
|
|||||||
None,
|
None,
|
||||||
0,
|
0,
|
||||||
20,
|
20,
|
||||||
&mut DefaultSearchLogger,
|
// &mut DefaultSearchLogger,
|
||||||
// &mut logger,
|
&mut logger,
|
||||||
)
|
)
|
||||||
.unwrap();
|
.unwrap();
|
||||||
|
|
||||||
// logger.write_d2_description(&mut ctx);
|
logger.write_d2_description(&mut ctx);
|
||||||
|
|
||||||
let elapsed = start.elapsed();
|
let elapsed = start.elapsed();
|
||||||
|
|
||||||
@ -414,7 +414,7 @@ mod tests {
|
|||||||
let start = Instant::now();
|
let start = Instant::now();
|
||||||
|
|
||||||
let mut s = Search::new(&txn, &index);
|
let mut s = Search::new(&txn, &index);
|
||||||
s.query("releases from poison by the government");
|
s.query("which a the releases from poison by the government");
|
||||||
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
s.terms_matching_strategy(TermsMatchingStrategy::Last);
|
||||||
s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
|
s.criterion_implementation_strategy(crate::CriterionImplementationStrategy::OnlySetBased);
|
||||||
let docs = s.execute().unwrap();
|
let docs = s.execute().unwrap();
|
||||||
|
Loading…
x
Reference in New Issue
Block a user