mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-23 11:47:28 +01:00
Use a cache when resolving the query graph
This commit is contained in:
parent
dcf3f1d18a
commit
a938fbde4a
@ -4,7 +4,7 @@ use roaring::{MultiOps, RoaringBitmap};
|
|||||||
use std::collections::{HashMap, HashSet, VecDeque};
|
use std::collections::{HashMap, HashSet, VecDeque};
|
||||||
|
|
||||||
use super::db_cache::DatabaseCache;
|
use super::db_cache::DatabaseCache;
|
||||||
use super::query_term::{QueryTerm, WordDerivations};
|
use super::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||||
use super::QueryGraph;
|
use super::QueryGraph;
|
||||||
use crate::{Index, Result, RoaringBitmapCodec};
|
use crate::{Index, Result, RoaringBitmapCodec};
|
||||||
|
|
||||||
@ -13,13 +13,66 @@ use crate::{Index, Result, RoaringBitmapCodec};
|
|||||||
// TODO: reuse NodeDocidsCache in between calls to resolve_query_graph
|
// TODO: reuse NodeDocidsCache in between calls to resolve_query_graph
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct NodeDocIdsCache {
|
pub struct NodeDocIdsCache {
|
||||||
pub cache: FxHashMap<usize, RoaringBitmap>,
|
pub cache: FxHashMap<u32, RoaringBitmap>,
|
||||||
|
}
|
||||||
|
impl NodeDocIdsCache {
|
||||||
|
fn get_docids<'cache, 'transaction>(
|
||||||
|
&'cache mut self,
|
||||||
|
index: &Index,
|
||||||
|
txn: &'transaction RoTxn,
|
||||||
|
db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
term: &QueryTerm,
|
||||||
|
node_idx: u32,
|
||||||
|
) -> Result<&'cache RoaringBitmap> {
|
||||||
|
if self.cache.contains_key(&node_idx) {
|
||||||
|
return Ok(&self.cache[&node_idx]);
|
||||||
|
};
|
||||||
|
let docids = match term {
|
||||||
|
QueryTerm::Phrase(_) => {
|
||||||
|
todo!("resolve phrase")
|
||||||
|
}
|
||||||
|
QueryTerm::Word {
|
||||||
|
derivations:
|
||||||
|
WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db },
|
||||||
|
} => {
|
||||||
|
let derivations_docids = {
|
||||||
|
let mut or_docids = vec![];
|
||||||
|
for word in zero_typo.iter().chain(one_typo.iter()).chain(two_typos.iter()) {
|
||||||
|
if let Some(word_docids) = db_cache.get_word_docids(index, txn, word)? {
|
||||||
|
or_docids.push(word_docids);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if *use_prefix_db {
|
||||||
|
if let Some(prefix_docids) =
|
||||||
|
db_cache.get_prefix_docids(index, txn, original.as_str())?
|
||||||
|
{
|
||||||
|
or_docids.push(prefix_docids);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
or_docids
|
||||||
|
};
|
||||||
|
let derivations_iter = derivations_docids
|
||||||
|
.into_iter()
|
||||||
|
.map(|slice| RoaringBitmapCodec::bytes_decode(slice).unwrap());
|
||||||
|
MultiOps::union(derivations_iter)
|
||||||
|
// TODO: if `or` is empty, register that somewhere, and immediately return an empty bitmap
|
||||||
|
// On the other hand, `or` *cannot* be empty, only its intersection with the universe can
|
||||||
|
//
|
||||||
|
// TODO: Or we don't do anything and accumulate all these operations in a tree of operations
|
||||||
|
// between frozen roaring bitmap that is resolved only at the very end
|
||||||
|
}
|
||||||
|
};
|
||||||
|
let _ = self.cache.insert(node_idx, docids);
|
||||||
|
let docids = &self.cache[&node_idx];
|
||||||
|
Ok(docids)
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
pub fn resolve_query_graph<'transaction>(
|
pub fn resolve_query_graph<'transaction>(
|
||||||
index: &Index,
|
index: &Index,
|
||||||
txn: &'transaction RoTxn,
|
txn: &'transaction RoTxn,
|
||||||
db_cache: &mut DatabaseCache<'transaction>,
|
db_cache: &mut DatabaseCache<'transaction>,
|
||||||
|
node_docids_cache: &mut NodeDocIdsCache,
|
||||||
q: &QueryGraph,
|
q: &QueryGraph,
|
||||||
universe: &RoaringBitmap,
|
universe: &RoaringBitmap,
|
||||||
) -> Result<RoaringBitmap> {
|
) -> Result<RoaringBitmap> {
|
||||||
@ -30,7 +83,7 @@ pub fn resolve_query_graph<'transaction>(
|
|||||||
|
|
||||||
let mut nodes_resolved = RoaringBitmap::new();
|
let mut nodes_resolved = RoaringBitmap::new();
|
||||||
// TODO: should be given as an argument and kept between invocations of resolve query graph
|
// TODO: should be given as an argument and kept between invocations of resolve query graph
|
||||||
let mut nodes_docids = vec![RoaringBitmap::new(); q.nodes.len()];
|
let mut path_nodes_docids = vec![RoaringBitmap::new(); q.nodes.len()];
|
||||||
|
|
||||||
let mut next_nodes_to_visit = VecDeque::new();
|
let mut next_nodes_to_visit = VecDeque::new();
|
||||||
next_nodes_to_visit.push_front(q.root_node);
|
next_nodes_to_visit.push_front(q.root_node);
|
||||||
@ -42,7 +95,7 @@ pub fn resolve_query_graph<'transaction>(
|
|||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
// Take union of all predecessors
|
// Take union of all predecessors
|
||||||
let predecessors_iter = predecessors.iter().map(|p| &nodes_docids[p as usize]);
|
let predecessors_iter = predecessors.iter().map(|p| &path_nodes_docids[p as usize]);
|
||||||
let predecessors_docids = MultiOps::union(predecessors_iter);
|
let predecessors_docids = MultiOps::union(predecessors_iter);
|
||||||
|
|
||||||
let n = &q.nodes[node as usize];
|
let n = &q.nodes[node as usize];
|
||||||
@ -50,47 +103,12 @@ pub fn resolve_query_graph<'transaction>(
|
|||||||
let node_docids = match n {
|
let node_docids = match n {
|
||||||
super::QueryNode::Term(located_term) => {
|
super::QueryNode::Term(located_term) => {
|
||||||
let term = &located_term.value;
|
let term = &located_term.value;
|
||||||
match term {
|
let derivations_docids =
|
||||||
QueryTerm::Phrase(_) => todo!("resolve phrase"),
|
node_docids_cache.get_docids(index, txn, db_cache, term, node)?;
|
||||||
QueryTerm::Word {
|
predecessors_docids & derivations_docids
|
||||||
derivations:
|
|
||||||
WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db },
|
|
||||||
} => {
|
|
||||||
let derivations_docids = {
|
|
||||||
let mut or_docids = vec![];
|
|
||||||
for word in
|
|
||||||
zero_typo.iter().chain(one_typo.iter()).chain(two_typos.iter())
|
|
||||||
{
|
|
||||||
if let Some(word_docids) =
|
|
||||||
db_cache.get_word_docids(index, txn, word)?
|
|
||||||
{
|
|
||||||
or_docids.push(word_docids);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
if *use_prefix_db {
|
|
||||||
if let Some(prefix_docids) =
|
|
||||||
db_cache.get_prefix_docids(index, txn, original.as_str())?
|
|
||||||
{
|
|
||||||
or_docids.push(prefix_docids);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
or_docids
|
|
||||||
};
|
|
||||||
let derivations_iter = derivations_docids
|
|
||||||
.into_iter()
|
|
||||||
.map(|slice| RoaringBitmapCodec::bytes_decode(slice).unwrap());
|
|
||||||
let derivations_docids = MultiOps::union(derivations_iter);
|
|
||||||
// TODO: if `or` is empty, register that somewhere, and immediately return an empty bitmap
|
|
||||||
// On the other hand, `or` *cannot* be empty, only its intersection with the universe can
|
|
||||||
//
|
|
||||||
// TODO: Or we don't do anything and accumulate all these operations in a tree of operations
|
|
||||||
// between frozen roaring bitmap that is resolved only at the very end
|
|
||||||
predecessors_docids & derivations_docids
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
super::QueryNode::Deleted => {
|
super::QueryNode::Deleted => {
|
||||||
todo!()
|
panic!()
|
||||||
}
|
}
|
||||||
super::QueryNode::Start => universe.clone(),
|
super::QueryNode::Start => universe.clone(),
|
||||||
super::QueryNode::End => {
|
super::QueryNode::End => {
|
||||||
@ -98,7 +116,7 @@ pub fn resolve_query_graph<'transaction>(
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
nodes_resolved.insert(node);
|
nodes_resolved.insert(node);
|
||||||
nodes_docids[node as usize] = node_docids;
|
path_nodes_docids[node as usize] = node_docids;
|
||||||
|
|
||||||
for succ in q.edges[node as usize].successors.iter() {
|
for succ in q.edges[node as usize].successors.iter() {
|
||||||
if !next_nodes_to_visit.contains(&succ) && !nodes_resolved.contains(succ) {
|
if !next_nodes_to_visit.contains(&succ) && !nodes_resolved.contains(succ) {
|
||||||
@ -108,7 +126,7 @@ pub fn resolve_query_graph<'transaction>(
|
|||||||
// This is currently slow but could easily be implemented very efficiently
|
// This is currently slow but could easily be implemented very efficiently
|
||||||
for prec in q.edges[node as usize].predecessors.iter() {
|
for prec in q.edges[node as usize].predecessors.iter() {
|
||||||
if q.edges[prec as usize].successors.is_subset(&nodes_resolved) {
|
if q.edges[prec as usize].successors.is_subset(&nodes_resolved) {
|
||||||
nodes_docids[prec as usize].clear();
|
path_nodes_docids[prec as usize].clear();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
// println!("cached docids: {nodes_docids:?}");
|
// println!("cached docids: {nodes_docids:?}");
|
||||||
@ -125,6 +143,7 @@ mod tests {
|
|||||||
use crate::db_snap;
|
use crate::db_snap;
|
||||||
use crate::index::tests::TempIndex;
|
use crate::index::tests::TempIndex;
|
||||||
use crate::new::db_cache::DatabaseCache;
|
use crate::new::db_cache::DatabaseCache;
|
||||||
|
use crate::new::resolve_query_graph::NodeDocIdsCache;
|
||||||
use crate::search::new::query_term::{word_derivations, LocatedQueryTerm};
|
use crate::search::new::query_term::{word_derivations, LocatedQueryTerm};
|
||||||
use crate::search::new::QueryGraph;
|
use crate::search::new::QueryGraph;
|
||||||
|
|
||||||
@ -184,10 +203,18 @@ mod tests {
|
|||||||
.unwrap();
|
.unwrap();
|
||||||
let graph = QueryGraph::from_query(&index, &txn, &mut db_cache, query).unwrap();
|
let graph = QueryGraph::from_query(&index, &txn, &mut db_cache, query).unwrap();
|
||||||
println!("{}", graph.graphviz());
|
println!("{}", graph.graphviz());
|
||||||
|
let mut node_docids_cache = NodeDocIdsCache::default();
|
||||||
let universe = index.documents_ids(&txn).unwrap();
|
let universe = index.documents_ids(&txn).unwrap();
|
||||||
insta::assert_debug_snapshot!(universe, @"RoaringBitmap<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]>");
|
insta::assert_debug_snapshot!(universe, @"RoaringBitmap<[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13]>");
|
||||||
let docids = resolve_query_graph(&index, &txn, &mut db_cache, &graph, &universe).unwrap();
|
let docids = resolve_query_graph(
|
||||||
|
&index,
|
||||||
|
&txn,
|
||||||
|
&mut db_cache,
|
||||||
|
&mut node_docids_cache,
|
||||||
|
&graph,
|
||||||
|
&universe,
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
insta::assert_debug_snapshot!(docids, @"RoaringBitmap<[8, 9, 11]>");
|
insta::assert_debug_snapshot!(docids, @"RoaringBitmap<[8, 9, 11]>");
|
||||||
|
|
||||||
// TODO: test with a reduced universe
|
// TODO: test with a reduced universe
|
||||||
|
@ -4,7 +4,7 @@ use heed::RoTxn;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::db_cache::DatabaseCache;
|
use super::db_cache::DatabaseCache;
|
||||||
use super::resolve_query_graph::resolve_query_graph;
|
use super::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
|
||||||
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput};
|
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput};
|
||||||
use crate::{Index, Result, TermsMatchingStrategy};
|
use crate::{Index, Result, TermsMatchingStrategy};
|
||||||
|
|
||||||
@ -14,6 +14,7 @@ pub struct Words {
|
|||||||
iterating: bool,
|
iterating: bool,
|
||||||
positions_to_remove: Vec<i8>,
|
positions_to_remove: Vec<i8>,
|
||||||
terms_matching_strategy: TermsMatchingStrategy,
|
terms_matching_strategy: TermsMatchingStrategy,
|
||||||
|
node_docids_cache: NodeDocIdsCache,
|
||||||
}
|
}
|
||||||
impl Words {
|
impl Words {
|
||||||
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
|
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
|
||||||
@ -23,6 +24,7 @@ impl Words {
|
|||||||
iterating: false,
|
iterating: false,
|
||||||
positions_to_remove: vec![],
|
positions_to_remove: vec![],
|
||||||
terms_matching_strategy,
|
terms_matching_strategy,
|
||||||
|
node_docids_cache: <_>::default(),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -79,7 +81,14 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
|||||||
let Some(query_graph) = &mut self.query_graph else { panic!() };
|
let Some(query_graph) = &mut self.query_graph else { panic!() };
|
||||||
// let graphviz = query_graph.graphviz();
|
// let graphviz = query_graph.graphviz();
|
||||||
// println!("\n===={graphviz}\n====");
|
// println!("\n===={graphviz}\n====");
|
||||||
let this_bucket = resolve_query_graph(index, txn, db_cache, query_graph, universe)?;
|
let this_bucket = resolve_query_graph(
|
||||||
|
index,
|
||||||
|
txn,
|
||||||
|
db_cache,
|
||||||
|
&mut self.node_docids_cache,
|
||||||
|
query_graph,
|
||||||
|
universe,
|
||||||
|
)?;
|
||||||
// println!("WORDS: this bucket: {this_bucket:?}");
|
// println!("WORDS: this bucket: {this_bucket:?}");
|
||||||
let child_query_graph = query_graph.clone();
|
let child_query_graph = query_graph.clone();
|
||||||
// this_bucket is the one that must be returned now
|
// this_bucket is the one that must be returned now
|
||||||
|
Loading…
x
Reference in New Issue
Block a user