Intern all strings and phrases in the search logic

This commit is contained in:
Loïc Lecrenier 2023-03-06 19:21:55 +01:00
parent 3f1729a17f
commit e8c76cf7bf
19 changed files with 635 additions and 654 deletions

View File

@ -1,51 +1,48 @@
use std::collections::hash_map::Entry; use super::{interner::Interned, SearchContext};
use crate::Result;
use fxhash::FxHashMap; use fxhash::FxHashMap;
use heed::types::ByteSlice; use heed::types::ByteSlice;
use heed::RoTxn; use std::collections::hash_map::Entry;
use crate::{Index, Result};
#[derive(Default)] #[derive(Default)]
pub struct DatabaseCache<'transaction> { pub struct DatabaseCache<'search> {
pub word_pair_proximity_docids: FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, // TODO: interner for all database cache keys
pub word_pair_proximity_docids:
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'search [u8]>>,
pub word_prefix_pair_proximity_docids: pub word_prefix_pair_proximity_docids:
FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'search [u8]>>,
pub prefix_word_pair_proximity_docids: pub prefix_word_pair_proximity_docids:
FxHashMap<(u8, String, String), Option<&'transaction [u8]>>, FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'search [u8]>>,
pub word_docids: FxHashMap<String, Option<&'transaction [u8]>>, pub word_docids: FxHashMap<Interned<String>, Option<&'search [u8]>>,
pub exact_word_docids: FxHashMap<String, Option<&'transaction [u8]>>, pub exact_word_docids: FxHashMap<Interned<String>, Option<&'search [u8]>>,
pub word_prefix_docids: FxHashMap<String, Option<&'transaction [u8]>>, pub word_prefix_docids: FxHashMap<Interned<String>, Option<&'search [u8]>>,
} }
impl<'transaction> DatabaseCache<'transaction> { impl<'search> SearchContext<'search> {
pub fn get_word_docids( pub fn get_word_docids(&mut self, word: Interned<String>) -> Result<Option<&'search [u8]>> {
&mut self, let bitmap_ptr = match self.db_cache.word_docids.entry(word) {
index: &Index,
txn: &'transaction RoTxn,
word: &str,
) -> Result<Option<&'transaction [u8]>> {
let bitmap_ptr = match self.word_docids.entry(word.to_owned()) {
Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(),
Entry::Vacant(entry) => { Entry::Vacant(entry) => {
let bitmap_ptr = index.word_docids.remap_data_type::<ByteSlice>().get(txn, word)?; let bitmap_ptr = self
.index
.word_docids
.remap_data_type::<ByteSlice>()
.get(self.txn, self.word_interner.get(word))?;
entry.insert(bitmap_ptr); entry.insert(bitmap_ptr);
bitmap_ptr bitmap_ptr
} }
}; };
Ok(bitmap_ptr) Ok(bitmap_ptr)
} }
pub fn get_prefix_docids( pub fn get_prefix_docids(&mut self, prefix: Interned<String>) -> Result<Option<&'search [u8]>> {
&mut self,
index: &Index,
txn: &'transaction RoTxn,
prefix: &str,
) -> Result<Option<&'transaction [u8]>> {
// In the future, this will be a frozen roaring bitmap // In the future, this will be a frozen roaring bitmap
let bitmap_ptr = match self.word_prefix_docids.entry(prefix.to_owned()) { let bitmap_ptr = match self.db_cache.word_prefix_docids.entry(prefix) {
Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(), Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(),
Entry::Vacant(entry) => { Entry::Vacant(entry) => {
let bitmap_ptr = let bitmap_ptr = self
index.word_prefix_docids.remap_data_type::<ByteSlice>().get(txn, prefix)?; .index
.word_prefix_docids
.remap_data_type::<ByteSlice>()
.get(self.txn, self.word_interner.get(prefix))?;
entry.insert(bitmap_ptr); entry.insert(bitmap_ptr);
bitmap_ptr bitmap_ptr
} }
@ -55,14 +52,12 @@ impl<'transaction> DatabaseCache<'transaction> {
pub fn get_word_pair_proximity_docids( pub fn get_word_pair_proximity_docids(
&mut self, &mut self,
index: &Index, word1: Interned<String>,
txn: &'transaction RoTxn, word2: Interned<String>,
word1: &str,
word2: &str,
proximity: u8, proximity: u8,
) -> Result<Option<&'transaction [u8]>> { ) -> Result<Option<&'search [u8]>> {
let key = (proximity, word1.to_owned(), word2.to_owned()); let key = (proximity, word1, word2);
match self.word_pair_proximity_docids.entry(key.clone()) { match self.db_cache.word_pair_proximity_docids.entry(key) {
Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()),
Entry::Vacant(entry) => { Entry::Vacant(entry) => {
// We shouldn't greedily access this DB at all // We shouldn't greedily access this DB at all
@ -86,10 +81,11 @@ impl<'transaction> DatabaseCache<'transaction> {
// output.push(word1, word2, proximities); // output.push(word1, word2, proximities);
// } // }
// } // }
let bitmap_ptr = index let bitmap_ptr =
.word_pair_proximity_docids self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>().get(
.remap_data_type::<ByteSlice>() self.txn,
.get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?; &(key.0, self.word_interner.get(key.1), self.word_interner.get(key.2)),
)?;
entry.insert(bitmap_ptr); entry.insert(bitmap_ptr);
Ok(bitmap_ptr) Ok(bitmap_ptr)
} }
@ -98,20 +94,22 @@ impl<'transaction> DatabaseCache<'transaction> {
pub fn get_word_prefix_pair_proximity_docids( pub fn get_word_prefix_pair_proximity_docids(
&mut self, &mut self,
index: &Index, word1: Interned<String>,
txn: &'transaction RoTxn, prefix2: Interned<String>,
word1: &str,
prefix2: &str,
proximity: u8, proximity: u8,
) -> Result<Option<&'transaction [u8]>> { ) -> Result<Option<&'search [u8]>> {
let key = (proximity, word1.to_owned(), prefix2.to_owned()); let key = (proximity, word1, prefix2);
match self.word_prefix_pair_proximity_docids.entry(key.clone()) { match self.db_cache.word_prefix_pair_proximity_docids.entry(key) {
Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()),
Entry::Vacant(entry) => { Entry::Vacant(entry) => {
let bitmap_ptr = index let bitmap_ptr = self
.index
.word_prefix_pair_proximity_docids .word_prefix_pair_proximity_docids
.remap_data_type::<ByteSlice>() .remap_data_type::<ByteSlice>()
.get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?; .get(
self.txn,
&(key.0, self.word_interner.get(key.1), self.word_interner.get(key.2)),
)?;
entry.insert(bitmap_ptr); entry.insert(bitmap_ptr);
Ok(bitmap_ptr) Ok(bitmap_ptr)
} }
@ -119,20 +117,26 @@ impl<'transaction> DatabaseCache<'transaction> {
} }
pub fn get_prefix_word_pair_proximity_docids( pub fn get_prefix_word_pair_proximity_docids(
&mut self, &mut self,
index: &Index, left_prefix: Interned<String>,
txn: &'transaction RoTxn, right: Interned<String>,
left_prefix: &str,
right: &str,
proximity: u8, proximity: u8,
) -> Result<Option<&'transaction [u8]>> { ) -> Result<Option<&'search [u8]>> {
let key = (proximity, left_prefix.to_owned(), right.to_owned()); let key = (proximity, left_prefix, right);
match self.prefix_word_pair_proximity_docids.entry(key) { match self.db_cache.prefix_word_pair_proximity_docids.entry(key) {
Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()), Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()),
Entry::Vacant(entry) => { Entry::Vacant(entry) => {
let bitmap_ptr = index let bitmap_ptr = self
.index
.prefix_word_pair_proximity_docids .prefix_word_pair_proximity_docids
.remap_data_type::<ByteSlice>() .remap_data_type::<ByteSlice>()
.get(txn, &(proximity, left_prefix, right))?; .get(
self.txn,
&(
proximity,
self.word_interner.get(left_prefix),
self.word_interner.get(right),
),
)?;
entry.insert(bitmap_ptr); entry.insert(bitmap_ptr);
Ok(bitmap_ptr) Ok(bitmap_ptr)
} }

View File

@ -1,15 +1,11 @@
use heed::RoTxn;
use roaring::RoaringBitmap;
use super::db_cache::DatabaseCache;
use super::logger::SearchLogger; use super::logger::SearchLogger;
use super::ranking_rule_graph::EdgeDocidsCache; use super::ranking_rule_graph::EdgeDocidsCache;
use super::ranking_rule_graph::EmptyPathsCache; use super::ranking_rule_graph::EmptyPathsCache;
use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait}; use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait};
use super::SearchContext;
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput}; use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput};
use crate::Result;
use crate::{Index, Result}; use roaring::RoaringBitmap;
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> { pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
id: String, id: String,
@ -29,12 +25,10 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
cur_distance_idx: usize, cur_distance_idx: usize,
} }
fn remove_empty_edges<'transaction, G: RankingRuleGraphTrait>( fn remove_empty_edges<'search, G: RankingRuleGraphTrait>(
ctx: &mut SearchContext<'search>,
graph: &mut RankingRuleGraph<G>, graph: &mut RankingRuleGraph<G>,
edge_docids_cache: &mut EdgeDocidsCache<G>, edge_docids_cache: &mut EdgeDocidsCache<G>,
index: &Index,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
empty_paths_cache: &mut EmptyPathsCache, empty_paths_cache: &mut EmptyPathsCache,
) -> Result<()> { ) -> Result<()> {
@ -42,8 +36,7 @@ fn remove_empty_edges<'transaction, G: RankingRuleGraphTrait>(
if graph.all_edges[edge_index as usize].is_none() { if graph.all_edges[edge_index as usize].is_none() {
continue; continue;
} }
let docids = edge_docids_cache let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?;
.get_edge_docids(index, txn, db_cache, edge_index, &*graph, universe)?;
match docids { match docids {
BitmapOrAllRef::Bitmap(bitmap) => { BitmapOrAllRef::Bitmap(bitmap) => {
if bitmap.is_disjoint(universe) { if bitmap.is_disjoint(universe) {
@ -59,7 +52,7 @@ fn remove_empty_edges<'transaction, G: RankingRuleGraphTrait>(
Ok(()) Ok(())
} }
impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGraph> impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
for GraphBasedRankingRule<G> for GraphBasedRankingRule<G>
{ {
fn id(&self) -> String { fn id(&self) -> String {
@ -67,24 +60,20 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
} }
fn start_iteration( fn start_iteration(
&mut self, &mut self,
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
_logger: &mut dyn SearchLogger<QueryGraph>, _logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
query_graph: &QueryGraph, query_graph: &QueryGraph,
) -> Result<()> { ) -> Result<()> {
// TODO: update old state instead of starting from scratch // TODO: update old state instead of starting from scratch
let mut graph = RankingRuleGraph::build(index, txn, db_cache, query_graph.clone())?; let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
let mut edge_docids_cache = EdgeDocidsCache::default(); let mut edge_docids_cache = EdgeDocidsCache::default();
let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len()); let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len());
remove_empty_edges( remove_empty_edges(
ctx,
&mut graph, &mut graph,
&mut edge_docids_cache, &mut edge_docids_cache,
index,
txn,
db_cache,
universe, universe,
&mut empty_paths_cache, &mut empty_paths_cache,
)?; )?;
@ -105,20 +94,16 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
fn next_bucket( fn next_bucket(
&mut self, &mut self,
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
logger: &mut dyn SearchLogger<QueryGraph>, logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> { ) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
assert!(universe.len() > 1); assert!(universe.len() > 1);
let mut state = self.state.take().unwrap(); let mut state = self.state.take().unwrap();
remove_empty_edges( remove_empty_edges(
ctx,
&mut state.graph, &mut state.graph,
&mut state.edge_docids_cache, &mut state.edge_docids_cache,
index,
txn,
db_cache,
universe, universe,
&mut state.empty_paths_cache, &mut state.empty_paths_cache,
)?; )?;
@ -151,9 +136,7 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
); );
let bucket = state.graph.resolve_paths( let bucket = state.graph.resolve_paths(
index, ctx,
txn,
db_cache,
&mut state.edge_docids_cache, &mut state.edge_docids_cache,
&mut state.empty_paths_cache, &mut state.empty_paths_cache,
universe, universe,
@ -169,9 +152,7 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
fn end_iteration( fn end_iteration(
&mut self, &mut self,
_index: &Index, _ctx: &mut SearchContext<'search>,
_txn: &'transaction RoTxn,
_db_cache: &mut DatabaseCache<'transaction>,
_logger: &mut dyn SearchLogger<QueryGraph>, _logger: &mut dyn SearchLogger<QueryGraph>,
) { ) {
self.state = None; self.state = None;

View File

@ -0,0 +1,78 @@
use fxhash::FxHashMap;
use std::hash::Hash;
use std::marker::PhantomData;
pub struct Interned<T> {
idx: u32,
_phantom: PhantomData<T>,
}
impl<T> Interned<T> {
fn new(idx: u32) -> Self {
Self { idx, _phantom: PhantomData }
}
}
pub struct Interner<T> {
stable_store: Vec<T>,
lookup: FxHashMap<T, Interned<T>>,
}
impl<T> Default for Interner<T> {
fn default() -> Self {
Self { stable_store: Default::default(), lookup: Default::default() }
}
}
impl<T> Interner<T>
where
T: Clone + Eq + Hash,
{
pub fn insert(&mut self, s: T) -> Interned<T> {
if let Some(interned) = self.lookup.get(&s) {
*interned
} else {
self.stable_store.push(s.clone());
let interned = Interned::new(self.stable_store.len() as u32 - 1);
self.lookup.insert(s, interned);
interned
}
}
pub fn get(&self, interned: Interned<T>) -> &T {
&self.stable_store[interned.idx as usize]
}
}
// Interned<T> boilerplate implementations
impl<T> Hash for Interned<T> {
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
self.idx.hash(state);
}
}
impl<T: Ord> Ord for Interned<T> {
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
self.idx.cmp(&other.idx)
}
}
impl<T> PartialOrd for Interned<T> {
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
self.idx.partial_cmp(&other.idx)
}
}
impl<T> Eq for Interned<T> {}
impl<T> PartialEq for Interned<T> {
fn eq(&self, other: &Self) -> bool {
self.idx == other.idx
}
}
impl<T> Clone for Interned<T> {
fn clone(&self) -> Self {
Self { idx: self.idx, _phantom: PhantomData }
}
}
impl<T> Copy for Interned<T> {}

View File

@ -6,7 +6,7 @@ use std::time::Instant;
use std::{io::Write, path::PathBuf}; use std::{io::Write, path::PathBuf};
use crate::new::ranking_rule_graph::TypoGraph; use crate::new::ranking_rule_graph::TypoGraph;
use crate::new::{QueryNode, QueryGraph}; use crate::new::{QueryNode, QueryGraph, SearchContext};
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::new::ranking_rule_graph::EmptyPathsCache; use crate::new::ranking_rule_graph::EmptyPathsCache;
use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait}; use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
@ -176,7 +176,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
} }
impl DetailedSearchLogger { impl DetailedSearchLogger {
pub fn write_d2_description(&self) { pub fn write_d2_description(&self,ctx: &mut SearchContext,) {
let mut prev_time = self.initial_query_time.unwrap(); let mut prev_time = self.initial_query_time.unwrap();
let mut timestamp = vec![]; let mut timestamp = vec![];
fn activated_id(timestamp: &[usize]) -> String { fn activated_id(timestamp: &[usize]) -> String {
@ -193,12 +193,12 @@ impl DetailedSearchLogger {
writeln!(&mut file, "direction: right").unwrap(); writeln!(&mut file, "direction: right").unwrap();
writeln!(&mut file, "Initial Query Graph: {{").unwrap(); writeln!(&mut file, "Initial Query Graph: {{").unwrap();
let initial_query_graph = self.initial_query.as_ref().unwrap(); let initial_query_graph = self.initial_query.as_ref().unwrap();
Self::query_graph_d2_description(initial_query_graph, &mut file); Self::query_graph_d2_description(ctx, initial_query_graph, &mut file);
writeln!(&mut file, "}}").unwrap(); writeln!(&mut file, "}}").unwrap();
writeln!(&mut file, "Query Graph Used To Compute Universe: {{").unwrap(); writeln!(&mut file, "Query Graph Used To Compute Universe: {{").unwrap();
let query_graph_for_universe = self.query_for_universe.as_ref().unwrap(); let query_graph_for_universe = self.query_for_universe.as_ref().unwrap();
Self::query_graph_d2_description(query_graph_for_universe, &mut file); Self::query_graph_d2_description(ctx, query_graph_for_universe, &mut file);
writeln!(&mut file, "}}").unwrap(); writeln!(&mut file, "}}").unwrap();
let initial_universe = self.initial_universe.as_ref().unwrap(); let initial_universe = self.initial_universe.as_ref().unwrap();
@ -308,7 +308,7 @@ results.{random} {{
let id = format!("{cur_ranking_rule}.{cur_activated_id}"); let id = format!("{cur_ranking_rule}.{cur_activated_id}");
let new_file_path = self.folder_path.join(format!("{id}.d2")); let new_file_path = self.folder_path.join(format!("{id}.d2"));
let mut new_file = std::fs::File::create(new_file_path).unwrap(); let mut new_file = std::fs::File::create(new_file_path).unwrap();
Self::query_graph_d2_description(query_graph, &mut new_file); Self::query_graph_d2_description(ctx, query_graph, &mut new_file);
writeln!( writeln!(
&mut file, &mut file,
"{id} {{ "{id} {{
@ -323,7 +323,7 @@ results.{random} {{
let id = format!("{cur_ranking_rule}.{cur_activated_id}"); let id = format!("{cur_ranking_rule}.{cur_activated_id}");
let new_file_path = self.folder_path.join(format!("{id}.d2")); let new_file_path = self.folder_path.join(format!("{id}.d2"));
let mut new_file = std::fs::File::create(new_file_path).unwrap(); let mut new_file = std::fs::File::create(new_file_path).unwrap();
Self::ranking_rule_graph_d2_description(graph, paths, empty_paths_cache, distances.clone(), &mut new_file); Self::ranking_rule_graph_d2_description(ctx, graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
writeln!( writeln!(
&mut file, &mut file,
"{id} {{ "{id} {{
@ -339,7 +339,7 @@ results.{random} {{
let id = format!("{cur_ranking_rule}.{cur_activated_id}"); let id = format!("{cur_ranking_rule}.{cur_activated_id}");
let new_file_path = self.folder_path.join(format!("{id}.d2")); let new_file_path = self.folder_path.join(format!("{id}.d2"));
let mut new_file = std::fs::File::create(new_file_path).unwrap(); let mut new_file = std::fs::File::create(new_file_path).unwrap();
Self::ranking_rule_graph_d2_description(graph, paths, empty_paths_cache, distances.clone(), &mut new_file); Self::ranking_rule_graph_d2_description(ctx,graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
writeln!( writeln!(
&mut file, &mut file,
"{id} {{ "{id} {{
@ -352,31 +352,40 @@ results.{random} {{
writeln!(&mut file, "}}").unwrap(); writeln!(&mut file, "}}").unwrap();
} }
fn query_node_d2_desc(node_idx: usize, node: &QueryNode, _distances: &[u64], file: &mut File) { fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, _distances: &[u64], file: &mut File) {
match &node { match &node {
QueryNode::Term(LocatedQueryTerm { value, .. }) => { QueryNode::Term(LocatedQueryTerm { value, .. }) => {
match value { match value {
QueryTerm::Phrase { phrase } => { QueryTerm::Phrase { phrase } => {
let phrase_str = phrase.description(); let phrase = ctx.phrase_interner.get(*phrase);
let phrase_str = phrase.description(&ctx.word_interner);
writeln!(file,"{node_idx} : \"{phrase_str}\"").unwrap(); writeln!(file,"{node_idx} : \"{phrase_str}\"").unwrap();
}, },
QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db, synonyms, split_words } } => { QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db, synonyms, split_words } } => {
let original = ctx.word_interner.get(*original);
writeln!(file,"{node_idx} : \"{original}\" {{ writeln!(file,"{node_idx} : \"{original}\" {{
shape: class").unwrap(); shape: class").unwrap();
for w in zero_typo { for w in zero_typo.iter().copied() {
let w = ctx.word_interner.get(w);
writeln!(file, "\"{w}\" : 0").unwrap(); writeln!(file, "\"{w}\" : 0").unwrap();
} }
for w in one_typo { for w in one_typo.iter().copied() {
let w = ctx.word_interner.get(w);
writeln!(file, "\"{w}\" : 1").unwrap(); writeln!(file, "\"{w}\" : 1").unwrap();
} }
for w in two_typos { for w in two_typos.iter().copied() {
let w = ctx.word_interner.get(w);
writeln!(file, "\"{w}\" : 2").unwrap(); writeln!(file, "\"{w}\" : 2").unwrap();
} }
if let Some((left, right)) = split_words { if let Some(split_words) = split_words {
writeln!(file, "\"{left} {right}\" : split_words").unwrap(); let phrase = ctx.phrase_interner.get(*split_words);
let phrase_str = phrase.description(&ctx.word_interner);
writeln!(file, "\"{phrase_str}\" : split_words").unwrap();
} }
for synonym in synonyms { for synonym in synonyms.iter().copied() {
writeln!(file, "\"{}\" : synonym", synonym.description()).unwrap(); let phrase = ctx.phrase_interner.get(synonym);
let phrase_str = phrase.description(&ctx.word_interner);
writeln!(file, "\"{phrase_str}\" : synonym").unwrap();
} }
if *use_prefix_db { if *use_prefix_db {
writeln!(file, "use prefix DB : true").unwrap(); writeln!(file, "use prefix DB : true").unwrap();
@ -398,20 +407,20 @@ shape: class").unwrap();
}, },
} }
} }
fn query_graph_d2_description(query_graph: &QueryGraph, file: &mut File) { fn query_graph_d2_description(ctx: &mut SearchContext, query_graph: &QueryGraph, file: &mut File) {
writeln!(file,"direction: right").unwrap(); writeln!(file,"direction: right").unwrap();
for node in 0..query_graph.nodes.len() { for node in 0..query_graph.nodes.len() {
if matches!(query_graph.nodes[node], QueryNode::Deleted) { if matches!(query_graph.nodes[node], QueryNode::Deleted) {
continue; continue;
} }
Self::query_node_d2_desc(node, &query_graph.nodes[node], &[], file); Self::query_node_d2_desc(ctx, node, &query_graph.nodes[node], &[], file);
for edge in query_graph.edges[node].successors.iter() { for edge in query_graph.edges[node].successors.iter() {
writeln!(file, "{node} -> {edge};\n").unwrap(); writeln!(file, "{node} -> {edge};\n").unwrap();
} }
} }
} }
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<u64>>, file: &mut File) { fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<u64>>, file: &mut File) {
writeln!(file,"direction: right").unwrap(); writeln!(file,"direction: right").unwrap();
writeln!(file, "Proximity Graph {{").unwrap(); writeln!(file, "Proximity Graph {{").unwrap();
@ -420,7 +429,7 @@ shape: class").unwrap();
continue; continue;
} }
let distances = &distances[node_idx]; let distances = &distances[node_idx];
Self::query_node_d2_desc(node_idx, node, distances.as_slice(), file); Self::query_node_d2_desc(ctx, node_idx, node, distances.as_slice(), file);
} }
for edge in graph.all_edges.iter().flatten() { for edge in graph.all_edges.iter().flatten() {
let Edge { from_node, to_node, details, .. } = edge; let Edge { from_node, to_node, details, .. } = edge;
@ -449,7 +458,7 @@ shape: class").unwrap();
writeln!(file, "Shortest Paths {{").unwrap(); writeln!(file, "Shortest Paths {{").unwrap();
Self::paths_d2_description(graph, paths, file); Self::paths_d2_description(ctx, graph, paths, file);
writeln!(file, "}}").unwrap(); writeln!(file, "}}").unwrap();
// writeln!(file, "Empty Edge Couples {{").unwrap(); // writeln!(file, "Empty Edge Couples {{").unwrap();
@ -468,15 +477,18 @@ shape: class").unwrap();
// } // }
// writeln!(file, "}}").unwrap(); // writeln!(file, "}}").unwrap();
} }
fn edge_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, edge_idx: u32, file: &mut File) { fn edge_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext,graph: &RankingRuleGraph<R>, edge_idx: u32, file: &mut File) {
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ; let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
let from_node = &graph.query_graph.nodes[*from_node as usize]; let from_node = &graph.query_graph.nodes[*from_node as usize];
let from_node_desc = match from_node { let from_node_desc = match from_node {
QueryNode::Term(term) => match &term.value { QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase { phrase } => { QueryTerm::Phrase { phrase } => {
phrase.description() let phrase = ctx.phrase_interner.get(*phrase);
phrase.description(&ctx.word_interner)
},
QueryTerm::Word { derivations } => {
ctx.word_interner.get(derivations.original).to_owned()
}, },
QueryTerm::Word { derivations } => derivations.original.clone(),
}, },
QueryNode::Deleted => panic!(), QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(), QueryNode::Start => "START".to_owned(),
@ -485,8 +497,11 @@ shape: class").unwrap();
let to_node = &graph.query_graph.nodes[*to_node as usize]; let to_node = &graph.query_graph.nodes[*to_node as usize];
let to_node_desc = match to_node { let to_node_desc = match to_node {
QueryNode::Term(term) => match &term.value { QueryNode::Term(term) => match &term.value {
QueryTerm::Phrase { phrase } => phrase.description(), QueryTerm::Phrase { phrase } => {
QueryTerm::Word { derivations } => derivations.original.clone(), let phrase = ctx.phrase_interner.get(*phrase);
phrase.description(&ctx.word_interner)
},
QueryTerm::Word { derivations } => ctx.word_interner.get(derivations.original).to_owned(),
}, },
QueryNode::Deleted => panic!(), QueryNode::Deleted => panic!(),
QueryNode::Start => "START".to_owned(), QueryNode::Start => "START".to_owned(),
@ -496,11 +511,11 @@ shape: class").unwrap();
shape: class shape: class
}}").unwrap(); }}").unwrap();
} }
fn paths_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], file: &mut File) { fn paths_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], file: &mut File) {
for (path_idx, edge_indexes) in paths.iter().enumerate() { for (path_idx, edge_indexes) in paths.iter().enumerate() {
writeln!(file, "{path_idx} {{").unwrap(); writeln!(file, "{path_idx} {{").unwrap();
for edge_idx in edge_indexes.iter() { for edge_idx in edge_indexes.iter() {
Self::edge_d2_description(graph, *edge_idx, file); Self::edge_d2_description(ctx, graph, *edge_idx, file);
} }
for couple_edges in edge_indexes.windows(2) { for couple_edges in edge_indexes.windows(2) {
let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() }; let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() };

View File

@ -1,5 +1,6 @@
mod db_cache; mod db_cache;
mod graph_based_ranking_rule; mod graph_based_ranking_rule;
mod interner;
mod logger; mod logger;
mod query_graph; mod query_graph;
mod query_term; mod query_term;
@ -26,7 +27,9 @@ use query_graph::{QueryGraph, QueryNode};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use self::{ use self::{
interner::Interner,
logger::SearchLogger, logger::SearchLogger,
query_term::Phrase,
resolve_query_graph::{resolve_query_graph, NodeDocIdsCache}, resolve_query_graph::{resolve_query_graph, NodeDocIdsCache},
}; };
@ -35,14 +38,32 @@ pub enum BitmapOrAllRef<'s> {
All, All,
} }
pub struct SearchContext<'search> {
pub index: &'search Index,
pub txn: &'search RoTxn<'search>,
pub db_cache: DatabaseCache<'search>,
pub word_interner: Interner<String>,
pub phrase_interner: Interner<Phrase>,
pub node_docids_cache: NodeDocIdsCache,
}
impl<'search> SearchContext<'search> {
pub fn new(index: &'search Index, txn: &'search RoTxn<'search>) -> Self {
Self {
index,
txn,
db_cache: <_>::default(),
word_interner: <_>::default(),
phrase_interner: <_>::default(),
node_docids_cache: <_>::default(),
}
}
}
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub fn resolve_maximally_reduced_query_graph<'transaction>( pub fn resolve_maximally_reduced_query_graph<'search>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction heed::RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
query_graph: &QueryGraph, query_graph: &QueryGraph,
node_docids_cache: &mut NodeDocIdsCache,
matching_strategy: TermsMatchingStrategy, matching_strategy: TermsMatchingStrategy,
logger: &mut dyn SearchLogger<QueryGraph>, logger: &mut dyn SearchLogger<QueryGraph>,
) -> Result<RoaringBitmap> { ) -> Result<RoaringBitmap> {
@ -73,16 +94,14 @@ pub fn resolve_maximally_reduced_query_graph<'transaction>(
} }
} }
logger.query_for_universe(&graph); logger.query_for_universe(&graph);
let docids = resolve_query_graph(index, txn, db_cache, node_docids_cache, &graph, universe)?; let docids = resolve_query_graph(ctx, &graph, universe)?;
Ok(docids) Ok(docids)
} }
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub fn execute_search<'transaction>( pub fn execute_search<'search>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
query: &str, query: &str,
filters: Option<Filter>, filters: Option<Filter>,
from: usize, from: usize,
@ -90,26 +109,21 @@ pub fn execute_search<'transaction>(
logger: &mut dyn SearchLogger<QueryGraph>, logger: &mut dyn SearchLogger<QueryGraph>,
) -> Result<Vec<u32>> { ) -> Result<Vec<u32>> {
assert!(!query.is_empty()); assert!(!query.is_empty());
let query_terms = located_query_terms_from_string(index, txn, query.tokenize(), None).unwrap(); let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None).unwrap();
let graph = QueryGraph::from_query(index, txn, db_cache, query_terms)?; let graph = QueryGraph::from_query(ctx, query_terms)?;
logger.initial_query(&graph); logger.initial_query(&graph);
let universe = if let Some(filters) = filters { let universe = if let Some(filters) = filters {
filters.evaluate(txn, index)? filters.evaluate(ctx.txn, ctx.index)?
} else { } else {
index.documents_ids(txn)? ctx.index.documents_ids(ctx.txn)?
}; };
let mut node_docids_cache = NodeDocIdsCache::default();
let universe = resolve_maximally_reduced_query_graph( let universe = resolve_maximally_reduced_query_graph(
index, ctx,
txn,
db_cache,
&universe, &universe,
&graph, &graph,
&mut node_docids_cache,
TermsMatchingStrategy::Last, TermsMatchingStrategy::Last,
logger, logger,
)?; )?;
@ -117,5 +131,5 @@ pub fn execute_search<'transaction>(
logger.initial_universe(&universe); logger.initial_universe(&universe);
apply_ranking_rules(index, txn, db_cache, &graph, &universe, from, length, logger) apply_ranking_rules(ctx, &graph, &universe, from, length, logger)
} }

View File

@ -1,13 +1,10 @@
use super::query_term::{self, LocatedQueryTerm, QueryTerm, WordDerivations};
use super::SearchContext;
use crate::Result;
use roaring::RoaringBitmap;
use std::fmt::Debug; use std::fmt::Debug;
use heed::RoTxn; #[derive(Clone)]
use roaring::RoaringBitmap;
use super::db_cache::DatabaseCache;
use super::query_term::{self, LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::{Index, Result};
#[derive(Debug, Clone)]
pub enum QueryNode { pub enum QueryNode {
Term(LocatedQueryTerm), Term(LocatedQueryTerm),
Deleted, Deleted,
@ -22,7 +19,7 @@ pub struct Edges {
pub successors: RoaringBitmap, pub successors: RoaringBitmap,
} }
#[derive(Debug, Clone)] #[derive(Clone)]
pub struct QueryGraph { pub struct QueryGraph {
pub root_node: u32, pub root_node: u32,
pub end_node: u32, pub end_node: u32,
@ -31,8 +28,8 @@ pub struct QueryGraph {
} }
fn _assert_sizes() { fn _assert_sizes() {
// TODO: QueryNodes are too big now, 184B is an unreasonable size // TODO: QueryNodes are too big now, 88B is a bit too big
let _: [u8; 184] = [0; std::mem::size_of::<QueryNode>()]; let _: [u8; 88] = [0; std::mem::size_of::<QueryNode>()];
let _: [u8; 48] = [0; std::mem::size_of::<Edges>()]; let _: [u8; 48] = [0; std::mem::size_of::<Edges>()];
} }
@ -72,19 +69,14 @@ impl QueryGraph {
impl QueryGraph { impl QueryGraph {
// TODO: return the list of all matching words here as well // TODO: return the list of all matching words here as well
pub fn from_query<'transaction>( pub fn from_query(ctx: &mut SearchContext, terms: Vec<LocatedQueryTerm>) -> Result<QueryGraph> {
index: &Index,
txn: &RoTxn,
_db_cache: &mut DatabaseCache<'transaction>,
terms: Vec<LocatedQueryTerm>,
) -> Result<QueryGraph> {
// TODO: maybe empty nodes should not be removed here, to compute // TODO: maybe empty nodes should not be removed here, to compute
// the score of the `words` ranking rule correctly // the score of the `words` ranking rule correctly
// it is very easy to traverse the graph and remove afterwards anyway // it is very easy to traverse the graph and remove afterwards anyway
// Still, I'm keeping this here as a demo // Still, I'm keeping this here as a demo
let mut empty_nodes = vec![]; let mut empty_nodes = vec![];
let word_set = index.words_fst(txn)?; let word_set = ctx.index.words_fst(ctx.txn)?;
let mut graph = QueryGraph::default(); let mut graph = QueryGraph::default();
let (mut prev2, mut prev1, mut prev0): (Vec<u32>, Vec<u32>, Vec<u32>) = let (mut prev2, mut prev1, mut prev0): (Vec<u32>, Vec<u32>, Vec<u32>) =
@ -105,20 +97,20 @@ impl QueryGraph {
if !prev1.is_empty() { if !prev1.is_empty() {
if let Some((ngram2_str, ngram2_pos)) = if let Some((ngram2_str, ngram2_pos)) =
query_term::ngram2(&query[length - 2], &query[length - 1]) query_term::ngram2(ctx, &query[length - 2], &query[length - 1])
{ {
if word_set.contains(ngram2_str.as_bytes()) { if word_set.contains(ctx.word_interner.get(ngram2_str)) {
let ngram2 = LocatedQueryTerm { let ngram2 = LocatedQueryTerm {
value: QueryTerm::Word { value: QueryTerm::Word {
derivations: WordDerivations { derivations: WordDerivations {
original: ngram2_str.clone(), original: ngram2_str,
// TODO: could add a typo if it's an ngram? // TODO: could add a typo if it's an ngram?
zero_typo: vec![ngram2_str], zero_typo: Box::new([ngram2_str]),
one_typo: vec![], one_typo: Box::new([]),
two_typos: vec![], two_typos: Box::new([]),
use_prefix_db: false, use_prefix_db: false,
synonyms: vec![], // TODO: ngram synonyms synonyms: Box::new([]), // TODO: ngram synonyms
split_words: None, // TODO: maybe ngram split words? split_words: None, // TODO: maybe ngram split words?
}, },
}, },
positions: ngram2_pos, positions: ngram2_pos,
@ -129,22 +121,25 @@ impl QueryGraph {
} }
} }
if !prev2.is_empty() { if !prev2.is_empty() {
if let Some((ngram3_str, ngram3_pos)) = if let Some((ngram3_str, ngram3_pos)) = query_term::ngram3(
query_term::ngram3(&query[length - 3], &query[length - 2], &query[length - 1]) ctx,
{ &query[length - 3],
if word_set.contains(ngram3_str.as_bytes()) { &query[length - 2],
&query[length - 1],
) {
if word_set.contains(ctx.word_interner.get(ngram3_str)) {
let ngram3 = LocatedQueryTerm { let ngram3 = LocatedQueryTerm {
value: QueryTerm::Word { value: QueryTerm::Word {
derivations: WordDerivations { derivations: WordDerivations {
original: ngram3_str.clone(), original: ngram3_str,
// TODO: could add a typo if it's an ngram? // TODO: could add a typo if it's an ngram?
zero_typo: vec![ngram3_str], zero_typo: Box::new([ngram3_str]),
one_typo: vec![], one_typo: Box::new([]),
two_typos: vec![], two_typos: Box::new([]),
use_prefix_db: false, use_prefix_db: false,
synonyms: vec![], // TODO: ngram synonyms synonyms: Box::new([]), // TODO: ngram synonyms
split_words: None, // TODO: maybe ngram split words? split_words: None, // TODO: maybe ngram split words?
// would be nice for typos like su nflower // would be nice for typos like su nflower
}, },
}, },
positions: ngram3_pos, positions: ngram3_pos,

View File

@ -16,30 +16,35 @@ use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
use crate::search::{build_dfa, get_first}; use crate::search::{build_dfa, get_first};
use crate::{CboRoaringBitmapLenCodec, Index, Result}; use crate::{CboRoaringBitmapLenCodec, Index, Result};
#[derive(Debug, Default, Clone)] use super::interner::{Interned, Interner};
use super::SearchContext;
#[derive(Default, Clone, PartialEq, Eq, Hash)]
pub struct Phrase { pub struct Phrase {
pub words: Vec<Option<String>>, pub words: Vec<Option<Interned<String>>>,
} }
impl Phrase { impl Phrase {
pub fn description(&self) -> String { pub fn description(&self, interner: &Interner<String>) -> String {
self.words.iter().flatten().join(" ") self.words.iter().flatten().map(|w| interner.get(*w)).join(" ")
} }
} }
#[derive(Debug, Clone)] #[derive(Clone)]
pub struct WordDerivations { pub struct WordDerivations {
pub original: String, pub original: Interned<String>,
// TODO: pub prefix_of: Vec<String>, // TODO: pub prefix_of: Vec<String>,
pub synonyms: Vec<Phrase>, pub synonyms: Box<[Interned<Phrase>]>,
pub split_words: Option<(String, String)>, pub split_words: Option<Interned<Phrase>>,
pub zero_typo: Vec<String>, pub zero_typo: Box<[Interned<String>]>,
pub one_typo: Vec<String>, pub one_typo: Box<[Interned<String>]>,
pub two_typos: Vec<String>, pub two_typos: Box<[Interned<String>]>,
pub use_prefix_db: bool, pub use_prefix_db: bool,
} }
impl WordDerivations { impl WordDerivations {
pub fn all_derivations_except_prefix_db(&self) -> impl Iterator<Item = &String> + Clone { pub fn all_derivations_except_prefix_db(
self.zero_typo.iter().chain(self.one_typo.iter()).chain(self.two_typos.iter()) &'_ self,
) -> impl Iterator<Item = Interned<String>> + Clone + '_ {
self.zero_typo.iter().chain(self.one_typo.iter()).chain(self.two_typos.iter()).copied()
} }
fn is_empty(&self) -> bool { fn is_empty(&self) -> bool {
self.zero_typo.is_empty() self.zero_typo.is_empty()
@ -50,15 +55,21 @@ impl WordDerivations {
} }
pub fn word_derivations( pub fn word_derivations(
index: &Index, ctx: &mut SearchContext,
txn: &RoTxn,
word: &str, word: &str,
max_typo: u8, max_typo: u8,
is_prefix: bool, is_prefix: bool,
fst: &fst::Set<Cow<[u8]>>, fst: &fst::Set<Cow<[u8]>>,
) -> Result<WordDerivations> { ) -> Result<WordDerivations> {
let word_interned = ctx.word_interner.insert(word.to_owned());
let use_prefix_db = is_prefix let use_prefix_db = is_prefix
&& index.word_prefix_docids.remap_data_type::<DecodeIgnore>().get(txn, word)?.is_some(); && ctx
.index
.word_prefix_docids
.remap_data_type::<DecodeIgnore>()
.get(ctx.txn, word)?
.is_some();
let mut zero_typo = vec![]; let mut zero_typo = vec![];
let mut one_typo = vec![]; let mut one_typo = vec![];
@ -70,11 +81,12 @@ pub fn word_derivations(
let mut stream = fst.search(prefix).into_stream(); let mut stream = fst.search(prefix).into_stream();
while let Some(word) = stream.next() { while let Some(word) = stream.next() {
let word = std::str::from_utf8(word)?; let word = std::str::from_utf8(word)?.to_owned();
zero_typo.push(word.to_string()); let word_interned = ctx.word_interner.insert(word);
zero_typo.push(word_interned);
} }
} else if fst.contains(word) { } else if fst.contains(word) {
zero_typo.push(word.to_string()); zero_typo.push(word_interned);
} }
} else if max_typo == 1 { } else if max_typo == 1 {
let dfa = build_dfa(word, 1, is_prefix); let dfa = build_dfa(word, 1, is_prefix);
@ -83,13 +95,14 @@ pub fn word_derivations(
while let Some((word, state)) = stream.next() { while let Some((word, state)) = stream.next() {
let word = std::str::from_utf8(word)?; let word = std::str::from_utf8(word)?;
let word_interned = ctx.word_interner.insert(word.to_owned());
let d = dfa.distance(state.1); let d = dfa.distance(state.1);
match d.to_u8() { match d.to_u8() {
0 => { 0 => {
zero_typo.push(word.to_string()); zero_typo.push(word_interned);
} }
1 => { 1 => {
one_typo.push(word.to_string()); one_typo.push(word_interned);
} }
_ => panic!(), _ => panic!(),
} }
@ -105,47 +118,56 @@ pub fn word_derivations(
while let Some((found_word, state)) = stream.next() { while let Some((found_word, state)) = stream.next() {
let found_word = std::str::from_utf8(found_word)?; let found_word = std::str::from_utf8(found_word)?;
let found_word_interned = ctx.word_interner.insert(found_word.to_owned());
// in the case the typo is on the first letter, we know the number of typo // in the case the typo is on the first letter, we know the number of typo
// is two // is two
if get_first(found_word) != get_first(word) { if get_first(found_word) != get_first(word) {
two_typos.push(found_word.to_string()); two_typos.push(found_word_interned);
} else { } else {
// Else, we know that it is the second dfa that matched and compute the // Else, we know that it is the second dfa that matched and compute the
// correct distance // correct distance
let d = second_dfa.distance((state.1).0); let d = second_dfa.distance((state.1).0);
match d.to_u8() { match d.to_u8() {
0 => { 0 => {
zero_typo.push(found_word.to_string()); zero_typo.push(found_word_interned);
} }
1 => { 1 => {
one_typo.push(found_word.to_string()); one_typo.push(found_word_interned);
} }
2 => { 2 => {
two_typos.push(found_word.to_string()); two_typos.push(found_word_interned);
} }
_ => panic!(), _ => panic!(),
} }
} }
} }
} }
let split_words = split_best_frequency(index, txn, word)?; let split_words = split_best_frequency(ctx.index, ctx.txn, word)?.map(|(l, r)| {
ctx.phrase_interner.insert(Phrase {
words: vec![Some(ctx.word_interner.insert(l)), Some(ctx.word_interner.insert(r))],
})
});
let synonyms = ctx.index.synonyms(ctx.txn)?;
let synonyms = index.synonyms(txn)?;
let synonyms = synonyms let synonyms = synonyms
.get(&vec![word.to_owned()]) .get(&vec![word.to_owned()])
.cloned() .cloned()
.unwrap_or_default() .unwrap_or_default()
.into_iter() .into_iter()
.map(|words| Phrase { words: words.into_iter().map(Some).collect() }) .map(|words| {
let words = words.into_iter().map(|w| Some(ctx.word_interner.insert(w))).collect();
ctx.phrase_interner.insert(Phrase { words })
})
.collect(); .collect();
Ok(WordDerivations { Ok(WordDerivations {
original: word.to_owned(), original: ctx.word_interner.insert(word.to_owned()),
synonyms, synonyms,
split_words, split_words,
zero_typo, zero_typo: zero_typo.into_boxed_slice(),
one_typo, one_typo: one_typo.into_boxed_slice(),
two_typos, two_typos: two_typos.into_boxed_slice(),
use_prefix_db, use_prefix_db,
}) })
} }
@ -176,33 +198,36 @@ fn split_best_frequency(
Ok(best.map(|(_, left, right)| (left.to_owned(), right.to_owned()))) Ok(best.map(|(_, left, right)| (left.to_owned(), right.to_owned())))
} }
#[derive(Debug, Clone)] #[derive(Clone)]
pub enum QueryTerm { pub enum QueryTerm {
// TODO: should there be SplitWord, NGram2, and NGram3 variants? // TODO: should there be SplitWord, NGram2, and NGram3 variants?
// NGram2 can have 1 typo and synonyms // NGram2 can have 1 typo and synonyms
// NGram3 cannot have typos but can have synonyms // NGram3 cannot have typos but can have synonyms
// SplitWords are a phrase // SplitWords are a phrase
// Can NGrams be prefixes? // Can NGrams be prefixes?
Phrase { phrase: Phrase }, Phrase { phrase: Interned<Phrase> },
Word { derivations: WordDerivations }, Word { derivations: WordDerivations },
} }
impl QueryTerm { impl QueryTerm {
pub fn original_single_word(&self) -> Option<&str> { pub fn original_single_word<'interner>(
&self,
word_interner: &'interner Interner<String>,
) -> Option<&'interner str> {
match self { match self {
QueryTerm::Phrase { phrase: _ } => None, QueryTerm::Phrase { phrase: _ } => None,
QueryTerm::Word { derivations } => { QueryTerm::Word { derivations } => {
if derivations.is_empty() { if derivations.is_empty() {
None None
} else { } else {
Some(derivations.original.as_str()) Some(word_interner.get(derivations.original))
} }
} }
} }
} }
} }
#[derive(Debug, Clone)] #[derive(Clone)]
pub struct LocatedQueryTerm { pub struct LocatedQueryTerm {
pub value: QueryTerm, pub value: QueryTerm,
pub positions: RangeInclusive<i8>, pub positions: RangeInclusive<i8>,
@ -217,18 +242,17 @@ impl LocatedQueryTerm {
} }
} }
pub fn located_query_terms_from_string<'transaction>( pub fn located_query_terms_from_string<'search>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
query: NormalizedTokenIter<Vec<u8>>, query: NormalizedTokenIter<Vec<u8>>,
words_limit: Option<usize>, words_limit: Option<usize>,
) -> Result<Vec<LocatedQueryTerm>> { ) -> Result<Vec<LocatedQueryTerm>> {
let authorize_typos = index.authorize_typos(txn)?; let authorize_typos = ctx.index.authorize_typos(ctx.txn)?;
let min_len_one_typo = index.min_word_len_one_typo(txn)?; let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?;
let min_len_two_typos = index.min_word_len_two_typos(txn)?; let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?;
let exact_words = index.exact_words(txn)?; let exact_words = ctx.index.exact_words(ctx.txn)?;
let fst = index.words_fst(txn)?; let fst = ctx.index.words_fst(ctx.txn)?;
let nbr_typos = |word: &str| { let nbr_typos = |word: &str| {
if !authorize_typos if !authorize_typos
@ -243,10 +267,6 @@ pub fn located_query_terms_from_string<'transaction>(
} }
}; };
let derivations = |word: &str, is_prefix: bool| {
word_derivations(index, txn, word, nbr_typos(word), is_prefix, &fst)
};
let mut primitive_query = Vec::new(); let mut primitive_query = Vec::new();
let mut phrase = Vec::new(); let mut phrase = Vec::new();
@ -279,14 +299,17 @@ pub fn located_query_terms_from_string<'transaction>(
if let TokenKind::StopWord = token.kind { if let TokenKind::StopWord = token.kind {
phrase.push(None); phrase.push(None);
} else { } else {
let word = ctx.word_interner.insert(token.lemma().to_string());
// TODO: in a phrase, check that every word exists // TODO: in a phrase, check that every word exists
// otherwise return WordDerivations::Empty // otherwise return WordDerivations::Empty
phrase.push(Some(token.lemma().to_string())); phrase.push(Some(word));
} }
} else if peekable.peek().is_some() { } else if peekable.peek().is_some() {
match token.kind { match token.kind {
TokenKind::Word => { TokenKind::Word => {
let derivations = derivations(token.lemma(), false)?; let word = token.lemma();
let derivations =
word_derivations(ctx, word, nbr_typos(word), false, &fst)?;
let located_term = LocatedQueryTerm { let located_term = LocatedQueryTerm {
value: QueryTerm::Word { derivations }, value: QueryTerm::Word { derivations },
positions: position..=position, positions: position..=position,
@ -296,7 +319,8 @@ pub fn located_query_terms_from_string<'transaction>(
TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {} TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {}
} }
} else { } else {
let derivations = derivations(token.lemma(), true)?; let word = token.lemma();
let derivations = word_derivations(ctx, word, nbr_typos(word), true, &fst)?;
let located_term = LocatedQueryTerm { let located_term = LocatedQueryTerm {
value: QueryTerm::Word { derivations }, value: QueryTerm::Word { derivations },
positions: position..=position, positions: position..=position,
@ -323,7 +347,9 @@ pub fn located_query_terms_from_string<'transaction>(
{ {
let located_query_term = LocatedQueryTerm { let located_query_term = LocatedQueryTerm {
value: QueryTerm::Phrase { value: QueryTerm::Phrase {
phrase: Phrase { words: mem::take(&mut phrase) }, phrase: ctx
.phrase_interner
.insert(Phrase { words: mem::take(&mut phrase) }),
}, },
positions: phrase_start..=phrase_end, positions: phrase_start..=phrase_end,
}; };
@ -337,7 +363,9 @@ pub fn located_query_terms_from_string<'transaction>(
// If a quote is never closed, we consider all of the end of the query as a phrase. // If a quote is never closed, we consider all of the end of the query as a phrase.
if !phrase.is_empty() { if !phrase.is_empty() {
let located_query_term = LocatedQueryTerm { let located_query_term = LocatedQueryTerm {
value: QueryTerm::Phrase { phrase: Phrase { words: mem::take(&mut phrase) } }, value: QueryTerm::Phrase {
phrase: ctx.phrase_interner.insert(Phrase { words: mem::take(&mut phrase) }),
},
positions: phrase_start..=phrase_end, positions: phrase_start..=phrase_end,
}; };
primitive_query.push(located_query_term); primitive_query.push(located_query_term);
@ -347,35 +375,49 @@ pub fn located_query_terms_from_string<'transaction>(
} }
// TODO: return a word derivations instead? // TODO: return a word derivations instead?
pub fn ngram2(x: &LocatedQueryTerm, y: &LocatedQueryTerm) -> Option<(String, RangeInclusive<i8>)> { pub fn ngram2(
ctx: &mut SearchContext,
x: &LocatedQueryTerm,
y: &LocatedQueryTerm,
) -> Option<(Interned<String>, RangeInclusive<i8>)> {
if *x.positions.end() != y.positions.start() - 1 { if *x.positions.end() != y.positions.start() - 1 {
return None; return None;
} }
match (&x.value.original_single_word(), &y.value.original_single_word()) { match (
&x.value.original_single_word(&ctx.word_interner),
&y.value.original_single_word(&ctx.word_interner),
) {
(Some(w1), Some(w2)) => { (Some(w1), Some(w2)) => {
let term = (format!("{w1}{w2}"), *x.positions.start()..=*y.positions.end()); let term = (
ctx.word_interner.insert(format!("{w1}{w2}")),
*x.positions.start()..=*y.positions.end(),
);
Some(term) Some(term)
} }
_ => None, _ => None,
} }
} }
pub fn ngram3( pub fn ngram3(
ctx: &mut SearchContext,
x: &LocatedQueryTerm, x: &LocatedQueryTerm,
y: &LocatedQueryTerm, y: &LocatedQueryTerm,
z: &LocatedQueryTerm, z: &LocatedQueryTerm,
) -> Option<(String, RangeInclusive<i8>)> { ) -> Option<(Interned<String>, RangeInclusive<i8>)> {
if *x.positions.end() != y.positions.start() - 1 if *x.positions.end() != y.positions.start() - 1
|| *y.positions.end() != z.positions.start() - 1 || *y.positions.end() != z.positions.start() - 1
{ {
return None; return None;
} }
match ( match (
&x.value.original_single_word(), &x.value.original_single_word(&ctx.word_interner),
&y.value.original_single_word(), &y.value.original_single_word(&ctx.word_interner),
&z.value.original_single_word(), &z.value.original_single_word(&ctx.word_interner),
) { ) {
(Some(w1), Some(w2), Some(w3)) => { (Some(w1), Some(w2), Some(w3)) => {
let term = (format!("{w1}{w2}{w3}"), *x.positions.start()..=*z.positions.end()); let term = (
ctx.word_interner.insert(format!("{w1}{w2}{w3}")),
*x.positions.start()..=*z.positions.end(),
);
Some(term) Some(term)
} }
_ => None, _ => None,

View File

@ -1,18 +1,10 @@
use heed::RoTxn; use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::{QueryGraph, SearchContext};
use crate::Result;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::db_cache::DatabaseCache;
use crate::new::QueryGraph;
use crate::{Index, Result};
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
pub fn build<'db_cache, 'transaction: 'db_cache>( pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
index: &Index,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
query_graph: QueryGraph,
) -> Result<Self> {
let mut ranking_rule_graph = let mut ranking_rule_graph =
Self { query_graph, all_edges: vec![], node_edges: vec![], successors: vec![] }; Self { query_graph, all_edges: vec![], node_edges: vec![], successors: vec![] };
@ -22,12 +14,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
let new_edges = ranking_rule_graph.node_edges.last_mut().unwrap(); let new_edges = ranking_rule_graph.node_edges.last_mut().unwrap();
let new_successors = ranking_rule_graph.successors.last_mut().unwrap(); let new_successors = ranking_rule_graph.successors.last_mut().unwrap();
let Some(from_node_data) = G::build_visit_from_node(index, txn, db_cache, node)? else { continue }; let Some(from_node_data) = G::build_visit_from_node(ctx, node)? else { continue };
for successor_idx in ranking_rule_graph.query_graph.edges[node_idx].successors.iter() { for successor_idx in ranking_rule_graph.query_graph.edges[node_idx].successors.iter() {
let to_node = &ranking_rule_graph.query_graph.nodes[successor_idx as usize]; let to_node = &ranking_rule_graph.query_graph.nodes[successor_idx as usize];
let mut edges = let mut edges = G::build_visit_to_node(ctx, to_node, &from_node_data)?;
G::build_visit_to_node(index, txn, db_cache, to_node, &from_node_data)?;
if edges.is_empty() { if edges.is_empty() {
continue; continue;
} }

View File

@ -1,13 +1,10 @@
use std::marker::PhantomData; use std::marker::PhantomData;
use fxhash::FxHashMap;
use heed::RoTxn;
use roaring::RoaringBitmap;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::db_cache::DatabaseCache; use crate::new::{BitmapOrAllRef, SearchContext};
use crate::new::BitmapOrAllRef; use crate::Result;
use crate::{Index, Result}; use fxhash::FxHashMap;
use roaring::RoaringBitmap;
// TODO: the cache should have a G::EdgeDetails as key // TODO: the cache should have a G::EdgeDetails as key
// but then it means that we should have a quick way of // but then it means that we should have a quick way of
@ -25,11 +22,9 @@ impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> {
} }
} }
impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> { impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
pub fn get_edge_docids<'s, 'transaction>( pub fn get_edge_docids<'s, 'search>(
&'s mut self, &'s mut self,
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
edge_index: u32, edge_index: u32,
graph: &RankingRuleGraph<G>, graph: &RankingRuleGraph<G>,
// TODO: maybe universe doesn't belong here // TODO: maybe universe doesn't belong here
@ -46,7 +41,7 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index])); return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
} }
// TODO: maybe universe doesn't belong here // TODO: maybe universe doesn't belong here
let docids = universe & G::compute_docids(index, txn, db_cache, details)?; let docids = universe & G::compute_docids(ctx, details)?;
let _ = self.cache.insert(edge_index, docids); let _ = self.cache.insert(edge_index, docids);
let docids = &self.cache[&edge_index]; let docids = &self.cache[&edge_index];
Ok(BitmapOrAllRef::Bitmap(docids)) Ok(BitmapOrAllRef::Bitmap(docids))

View File

@ -7,20 +7,15 @@ mod proximity;
mod resolve_paths; mod resolve_paths;
mod typo; mod typo;
use super::logger::SearchLogger;
use super::{QueryGraph, QueryNode, SearchContext};
use crate::Result;
pub use edge_docids_cache::EdgeDocidsCache; pub use edge_docids_cache::EdgeDocidsCache;
pub use empty_paths_cache::EmptyPathsCache; pub use empty_paths_cache::EmptyPathsCache;
pub use proximity::ProximityGraph; pub use proximity::ProximityGraph;
pub use typo::TypoGraph;
use std::ops::ControlFlow;
use heed::RoTxn;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use std::ops::ControlFlow;
use super::db_cache::DatabaseCache; pub use typo::TypoGraph;
use super::logger::SearchLogger;
use super::{QueryGraph, QueryNode};
use crate::{Index, Result};
#[derive(Debug, Clone)] #[derive(Debug, Clone)]
pub enum EdgeDetails<E> { pub enum EdgeDetails<E> {
@ -42,6 +37,48 @@ pub struct EdgePointer<'graph, E> {
pub edge: &'graph Edge<E>, pub edge: &'graph Edge<E>,
} }
// pub struct SubWordDerivations {
// words: FxHashSet<Interned<String>>,
// synonyms: FxHashSet<Interned<Phrase>>, // NO! they're phrases, not strings
// split_words: bool,
// use_prefix_db: bool,
// }
// pub struct EdgeWordDerivations {
// // TODO: not Option, instead: Any | All | Subset(SubWordDerivations)
// from_words: Option<SubWordDerivations>, // ???
// to_words: Option<SubWordDerivations>, // + use prefix db?
// }
// fn aggregate_edge_word_derivations(
// graph: (),
// edges: Vec<usize>,
// ) -> BTreeMap<usize, SubWordDerivations> {
// todo!()
// }
// fn reduce_word_term_to_sub_word_derivations(
// term: &mut WordDerivations,
// derivations: &SubWordDerivations,
// ) {
// let mut new_one_typo = vec![];
// for w in term.one_typo {
// if derivations.words.contains(w) {
// new_one_typo.push(w);
// }
// }
// if term.use_prefix_db && !derivations.use_prefix_db {
// term.use_prefix_db = false;
// }
// // etc.
// }
// fn word_derivations_used_by_edge<G: RankingRuleGraphTrait>(
// edge: G::EdgeDetails,
// ) -> SubWordDerivations {
// todo!()
// }
pub trait RankingRuleGraphTrait: Sized { pub trait RankingRuleGraphTrait: Sized {
/// The details of an edge connecting two query nodes. These details /// The details of an edge connecting two query nodes. These details
/// should be sufficient to compute the edge's cost and associated document ids /// should be sufficient to compute the edge's cost and associated document ids
@ -55,10 +92,8 @@ pub trait RankingRuleGraphTrait: Sized {
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String; fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String;
/// Compute the document ids associated with the given edge. /// Compute the document ids associated with the given edge.
fn compute_docids<'transaction>( fn compute_docids<'search>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
edge_details: &Self::EdgeDetails, edge_details: &Self::EdgeDetails,
) -> Result<RoaringBitmap>; ) -> Result<RoaringBitmap>;
@ -66,19 +101,15 @@ pub trait RankingRuleGraphTrait: Sized {
/// ///
/// This call is followed by zero, one or more calls to [`build_visit_to_node`](RankingRuleGraphTrait::build_visit_to_node), /// This call is followed by zero, one or more calls to [`build_visit_to_node`](RankingRuleGraphTrait::build_visit_to_node),
/// which builds the actual edges. /// which builds the actual edges.
fn build_visit_from_node<'transaction>( fn build_visit_from_node<'search>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
from_node: &QueryNode, from_node: &QueryNode,
) -> Result<Option<Self::BuildVisitedFromNode>>; ) -> Result<Option<Self::BuildVisitedFromNode>>;
/// Return the cost and details of the edges going from the previously visited node /// Return the cost and details of the edges going from the previously visited node
/// (with [`build_visit_from_node`](RankingRuleGraphTrait::build_visit_from_node)) to `to_node`. /// (with [`build_visit_from_node`](RankingRuleGraphTrait::build_visit_from_node)) to `to_node`.
fn build_visit_to_node<'from_data, 'transaction: 'from_data>( fn build_visit_to_node<'from_data, 'search: 'from_data>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
to_node: &QueryNode, to_node: &QueryNode,
from_node_data: &'from_data Self::BuildVisitedFromNode, from_node_data: &'from_data Self::BuildVisitedFromNode,
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>; ) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>;

View File

@ -1,30 +1,30 @@
use std::collections::BTreeMap;
use heed::RoTxn;
use itertools::Itertools;
use super::ProximityEdge; use super::ProximityEdge;
use crate::new::db_cache::DatabaseCache;
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations}; use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
use crate::new::ranking_rule_graph::proximity::WordPair; use crate::new::ranking_rule_graph::proximity::WordPair;
use crate::new::ranking_rule_graph::EdgeDetails; use crate::new::ranking_rule_graph::EdgeDetails;
use crate::new::QueryNode; use crate::new::{QueryNode, SearchContext};
use crate::{Index, Result}; use crate::Result;
use itertools::Itertools;
use std::collections::BTreeMap;
pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations, i8)>> { pub fn visit_from_node(
ctx: &mut SearchContext,
from_node: &QueryNode,
) -> Result<Option<(WordDerivations, i8)>> {
Ok(Some(match from_node { Ok(Some(match from_node {
QueryNode::Term(LocatedQueryTerm { value: value1, positions: pos1 }) => match value1 { QueryNode::Term(LocatedQueryTerm { value: value1, positions: pos1 }) => match value1 {
QueryTerm::Word { derivations } => (derivations.clone(), *pos1.end()), QueryTerm::Word { derivations } => (derivations.clone(), *pos1.end()),
QueryTerm::Phrase { phrase: phrase1 } => { QueryTerm::Phrase { phrase: phrase1 } => {
if let Some(original) = phrase1.words.last().unwrap().as_ref() { let phrase1 = ctx.phrase_interner.get(*phrase1);
if let Some(original) = *phrase1.words.last().unwrap() {
( (
WordDerivations { WordDerivations {
original: original.clone(), original,
zero_typo: vec![original.to_owned()], zero_typo: Box::new([original]),
one_typo: vec![], one_typo: Box::new([]),
two_typos: vec![], two_typos: Box::new([]),
use_prefix_db: false, use_prefix_db: false,
synonyms: vec![], synonyms: Box::new([]),
split_words: None, split_words: None,
}, },
*pos1.end(), *pos1.end(),
@ -37,12 +37,12 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations,
}, },
QueryNode::Start => ( QueryNode::Start => (
WordDerivations { WordDerivations {
original: String::new(), original: ctx.word_interner.insert(String::new()),
zero_typo: vec![], zero_typo: Box::new([]),
one_typo: vec![], one_typo: Box::new([]),
two_typos: vec![], two_typos: Box::new([]),
use_prefix_db: false, use_prefix_db: false,
synonyms: vec![], synonyms: Box::new([]),
split_words: None, split_words: None,
}, },
-100, -100,
@ -51,10 +51,8 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations,
})) }))
} }
pub fn visit_to_node<'transaction, 'from_data>( pub fn visit_to_node<'search, 'from_data>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
to_node: &QueryNode, to_node: &QueryNode,
from_node_data: &'from_data (WordDerivations, i8), from_node_data: &'from_data (WordDerivations, i8),
) -> Result<Vec<(u8, EdgeDetails<ProximityEdge>)>> { ) -> Result<Vec<(u8, EdgeDetails<ProximityEdge>)>> {
@ -69,15 +67,16 @@ pub fn visit_to_node<'transaction, 'from_data>(
let (derivations2, pos2, ngram_len2) = match value2 { let (derivations2, pos2, ngram_len2) = match value2 {
QueryTerm::Word { derivations } => (derivations.clone(), *pos2.start(), pos2.len()), QueryTerm::Word { derivations } => (derivations.clone(), *pos2.start(), pos2.len()),
QueryTerm::Phrase { phrase: phrase2 } => { QueryTerm::Phrase { phrase: phrase2 } => {
if let Some(original) = phrase2.words.first().unwrap().as_ref() { let phrase2 = ctx.phrase_interner.get(*phrase2);
if let Some(original) = *phrase2.words.first().unwrap() {
( (
WordDerivations { WordDerivations {
original: original.clone(), original,
zero_typo: vec![original.to_owned()], zero_typo: Box::new([original]),
one_typo: vec![], one_typo: Box::new([]),
two_typos: vec![], two_typos: Box::new([]),
use_prefix_db: false, use_prefix_db: false,
synonyms: vec![], synonyms: Box::new([]),
split_words: None, split_words: None,
}, },
*pos2.start(), *pos2.start(),
@ -106,19 +105,16 @@ pub fn visit_to_node<'transaction, 'from_data>(
let derivations1 = derivations1.all_derivations_except_prefix_db(); let derivations1 = derivations1.all_derivations_except_prefix_db();
// TODO: eventually, we want to get rid of the uses from `orginal` // TODO: eventually, we want to get rid of the uses from `orginal`
let original_word_2 = derivations2.original.clone();
let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new(); let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new();
if updb2 { if updb2 {
for word1 in derivations1.clone() { for word1 in derivations1.clone() {
for proximity in 1..=(8 - ngram_len2) { for proximity in 1..=(8 - ngram_len2) {
let cost = (proximity + ngram_len2 - 1) as u8; let cost = (proximity + ngram_len2 - 1) as u8;
if db_cache if ctx
.get_word_prefix_pair_proximity_docids( .get_word_prefix_pair_proximity_docids(
index,
txn,
word1, word1,
original_word_2.as_str(), derivations2.original,
proximity as u8, proximity as u8,
)? )?
.is_some() .is_some()
@ -129,16 +125,14 @@ pub fn visit_to_node<'transaction, 'from_data>(
.entry(proximity as u8) .entry(proximity as u8)
.or_default() .or_default()
.push(WordPair::WordPrefix { .push(WordPair::WordPrefix {
left: word1.to_owned(), left: word1,
right_prefix: original_word_2.to_owned(), right_prefix: derivations2.original,
}); });
} }
if db_cache if ctx
.get_prefix_word_pair_proximity_docids( .get_prefix_word_pair_proximity_docids(
index, derivations2.original,
txn, word1,
original_word_2.as_str(),
word1.as_str(),
proximity as u8 - 1, proximity as u8 - 1,
)? )?
.is_some() .is_some()
@ -149,8 +143,8 @@ pub fn visit_to_node<'transaction, 'from_data>(
.entry(proximity as u8) .entry(proximity as u8)
.or_default() .or_default()
.push(WordPair::WordPrefixSwapped { .push(WordPair::WordPrefixSwapped {
left_prefix: original_word_2.to_owned(), left_prefix: derivations2.original,
right: word1.to_owned(), right: word1,
}); });
} }
} }
@ -164,28 +158,23 @@ pub fn visit_to_node<'transaction, 'from_data>(
for (word1, word2) in product_derivations { for (word1, word2) in product_derivations {
for proximity in 1..=(8 - ngram_len2) { for proximity in 1..=(8 - ngram_len2) {
let cost = (proximity + ngram_len2 - 1) as u8; let cost = (proximity + ngram_len2 - 1) as u8;
if db_cache if ctx.get_word_pair_proximity_docids(word1, word2, proximity as u8)?.is_some() {
.get_word_pair_proximity_docids(index, txn, word1, word2, proximity as u8)?
.is_some()
{
cost_proximity_word_pairs cost_proximity_word_pairs
.entry(cost) .entry(cost)
.or_default() .or_default()
.entry(proximity as u8) .entry(proximity as u8)
.or_default() .or_default()
.push(WordPair::Words { left: word1.to_owned(), right: word2.to_owned() }); .push(WordPair::Words { left: word1, right: word2 });
} }
if proximity > 1 if proximity > 1
&& db_cache && ctx.get_word_pair_proximity_docids(word2, word1, proximity as u8 - 1)?.is_some()
.get_word_pair_proximity_docids(index, txn, word2, word1, proximity as u8 - 1)?
.is_some()
{ {
cost_proximity_word_pairs cost_proximity_word_pairs
.entry(cost) .entry(cost)
.or_default() .or_default()
.entry(proximity as u8 - 1) .entry(proximity as u8 - 1)
.or_default() .or_default()
.push(WordPair::Words { left: word2.to_owned(), right: word1.to_owned() }); .push(WordPair::Words { left: word2, right: word1 });
} }
} }
} }

View File

@ -1,14 +1,10 @@
use heed::RoTxn; use super::{ProximityEdge, WordPair};
use crate::new::SearchContext;
use crate::{CboRoaringBitmapCodec, Result};
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
use super::{ProximityEdge, WordPair}; pub fn compute_docids<'search>(
use crate::new::db_cache::DatabaseCache; ctx: &mut SearchContext<'search>,
use crate::{CboRoaringBitmapCodec, Result};
pub fn compute_docids<'transaction>(
index: &crate::Index,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
edge: &ProximityEdge, edge: &ProximityEdge,
) -> Result<RoaringBitmap> { ) -> Result<RoaringBitmap> {
let ProximityEdge { pairs, proximity } = edge; let ProximityEdge { pairs, proximity } = edge;
@ -16,12 +12,14 @@ pub fn compute_docids<'transaction>(
for pair in pairs.iter() { for pair in pairs.iter() {
let bytes = match pair { let bytes = match pair {
WordPair::Words { left, right } => { WordPair::Words { left, right } => {
db_cache.get_word_pair_proximity_docids(index, txn, left, right, *proximity) ctx.get_word_pair_proximity_docids(*left, *right, *proximity)
}
WordPair::WordPrefix { left, right_prefix } => {
ctx.get_word_prefix_pair_proximity_docids(*left, *right_prefix, *proximity)
}
WordPair::WordPrefixSwapped { left_prefix, right } => {
ctx.get_prefix_word_pair_proximity_docids(*left_prefix, *right, *proximity)
} }
WordPair::WordPrefix { left, right_prefix } => db_cache
.get_word_prefix_pair_proximity_docids(index, txn, left, right_prefix, *proximity),
WordPair::WordPrefixSwapped { left_prefix, right } => db_cache
.get_prefix_word_pair_proximity_docids(index, txn, left_prefix, right, *proximity),
}?; }?;
let bitmap = let bitmap =
bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default(); bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default();

View File

@ -1,25 +1,22 @@
pub mod build; pub mod build;
pub mod compute_docids; pub mod compute_docids;
use heed::RoTxn;
use roaring::RoaringBitmap;
use super::empty_paths_cache::EmptyPathsCache; use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeDetails, RankingRuleGraphTrait}; use super::{EdgeDetails, RankingRuleGraphTrait};
use crate::new::db_cache::DatabaseCache; use crate::new::interner::Interned;
use crate::new::logger::SearchLogger; use crate::new::logger::SearchLogger;
use crate::new::query_term::WordDerivations; use crate::new::query_term::WordDerivations;
use crate::new::{QueryGraph, QueryNode}; use crate::new::{QueryGraph, QueryNode, SearchContext};
use crate::{Index, Result}; use crate::Result;
use roaring::RoaringBitmap;
// TODO: intern the strings, refer to them by their pointer? // TODO: intern the strings, refer to them by their pointer?
#[derive(Debug, Clone)] #[derive(Clone)]
pub enum WordPair { pub enum WordPair {
Words { left: String, right: String }, Words { left: Interned<String>, right: Interned<String> },
WordPrefix { left: String, right_prefix: String }, WordPrefix { left: Interned<String>, right_prefix: Interned<String> },
WordPrefixSwapped { left_prefix: String, right: String }, WordPrefixSwapped { left_prefix: Interned<String>, right: Interned<String> },
} }
#[derive(Clone)] #[derive(Clone)]
@ -40,32 +37,26 @@ impl RankingRuleGraphTrait for ProximityGraph {
format!(", prox {proximity}, {} pairs", pairs.len()) format!(", prox {proximity}, {} pairs", pairs.len())
} }
fn compute_docids<'db_cache, 'transaction>( fn compute_docids<'search>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
edge: &Self::EdgeDetails, edge: &Self::EdgeDetails,
) -> Result<roaring::RoaringBitmap> { ) -> Result<roaring::RoaringBitmap> {
compute_docids::compute_docids(index, txn, db_cache, edge) compute_docids::compute_docids(ctx, edge)
} }
fn build_visit_from_node<'transaction>( fn build_visit_from_node<'search>(
_index: &Index, ctx: &mut SearchContext<'search>,
_txn: &'transaction RoTxn,
_db_cache: &mut DatabaseCache<'transaction>,
from_node: &QueryNode, from_node: &QueryNode,
) -> Result<Option<Self::BuildVisitedFromNode>> { ) -> Result<Option<Self::BuildVisitedFromNode>> {
build::visit_from_node(from_node) build::visit_from_node(ctx, from_node)
} }
fn build_visit_to_node<'from_data, 'transaction: 'from_data>( fn build_visit_to_node<'from_data, 'search: 'from_data>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
to_node: &QueryNode, to_node: &QueryNode,
from_node_data: &'from_data Self::BuildVisitedFromNode, from_node_data: &'from_data Self::BuildVisitedFromNode,
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> { ) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
build::visit_to_node(index, txn, db_cache, to_node, from_node_data) build::visit_to_node(ctx, to_node, from_node_data)
} }
fn log_state( fn log_state(

View File

@ -1,23 +1,18 @@
#![allow(clippy::too_many_arguments)] #![allow(clippy::too_many_arguments)]
use heed::RoTxn;
use roaring::{MultiOps, RoaringBitmap};
use super::edge_docids_cache::EdgeDocidsCache; use super::edge_docids_cache::EdgeDocidsCache;
use super::empty_paths_cache::EmptyPathsCache; use super::empty_paths_cache::EmptyPathsCache;
use super::{RankingRuleGraph, RankingRuleGraphTrait}; use super::{RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::db_cache::DatabaseCache; use crate::new::{BitmapOrAllRef, SearchContext};
use crate::Result;
use crate::new::BitmapOrAllRef; use roaring::{MultiOps, RoaringBitmap};
use crate::{Index, Result};
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> { impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
pub fn resolve_paths<'transaction>( // TODO: reduce the universe after computing each path
// TODO: deserialize roaring bitmap within a universe
pub fn resolve_paths<'search>(
&mut self, &mut self,
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
edge_docids_cache: &mut EdgeDocidsCache<G>, edge_docids_cache: &mut EdgeDocidsCache<G>,
empty_paths_cache: &mut EmptyPathsCache, empty_paths_cache: &mut EmptyPathsCache,
universe: &RoaringBitmap, universe: &RoaringBitmap,
@ -52,8 +47,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
let mut cached_edge_docids = vec![]; let mut cached_edge_docids = vec![];
'edge_loop: for edge_index in edge_indexes { 'edge_loop: for edge_index in edge_indexes {
visited_edges.push(edge_index); visited_edges.push(edge_index);
let edge_docids = edge_docids_cache let edge_docids =
.get_edge_docids(index, txn, db_cache, edge_index, self, universe)?; edge_docids_cache.get_edge_docids(ctx, edge_index, self, universe)?;
match edge_docids { match edge_docids {
BitmapOrAllRef::Bitmap(edge_docids) => { BitmapOrAllRef::Bitmap(edge_docids) => {
cached_edge_docids.push((edge_index, edge_docids.clone())); cached_edge_docids.push((edge_index, edge_docids.clone()));

View File

@ -1,19 +1,17 @@
use heed::{BytesDecode, RoTxn};
use roaring::RoaringBitmap;
use super::empty_paths_cache::EmptyPathsCache; use super::empty_paths_cache::EmptyPathsCache;
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait}; use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
use crate::new::db_cache::DatabaseCache; use crate::new::interner::Interned;
use crate::new::logger::SearchLogger; use crate::new::logger::SearchLogger;
use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations}; use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
use crate::new::resolve_query_graph::resolve_phrase; use crate::new::resolve_query_graph::resolve_phrase;
use crate::new::{QueryGraph, QueryNode}; use crate::new::{QueryGraph, QueryNode, SearchContext};
use crate::{Index, Result, RoaringBitmapCodec}; use crate::{Result, RoaringBitmapCodec};
use heed::BytesDecode;
use roaring::RoaringBitmap;
#[derive(Clone)] #[derive(Clone)]
pub enum TypoEdge { pub enum TypoEdge {
Phrase { phrase: Phrase }, Phrase { phrase: Interned<Phrase> },
Word { derivations: WordDerivations, nbr_typos: u8 }, Word { derivations: WordDerivations, nbr_typos: u8 },
} }
@ -30,14 +28,12 @@ impl RankingRuleGraphTrait for TypoGraph {
} }
} }
fn compute_docids<'db_cache, 'transaction>( fn compute_docids<'db_cache, 'search>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
edge: &Self::EdgeDetails, edge: &Self::EdgeDetails,
) -> Result<RoaringBitmap> { ) -> Result<RoaringBitmap> {
match edge { match edge {
TypoEdge::Phrase { phrase } => resolve_phrase(index, txn, db_cache, phrase), TypoEdge::Phrase { phrase } => resolve_phrase(ctx, *phrase),
TypoEdge::Word { derivations, nbr_typos } => { TypoEdge::Word { derivations, nbr_typos } => {
let words = match nbr_typos { let words = match nbr_typos {
0 => &derivations.zero_typo, 0 => &derivations.zero_typo,
@ -46,16 +42,14 @@ impl RankingRuleGraphTrait for TypoGraph {
_ => panic!(), _ => panic!(),
}; };
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for word in words.iter() { for word in words.iter().copied() {
let Some(bytes) = db_cache.get_word_docids(index, txn, word)? else { continue }; let Some(bytes) = ctx.get_word_docids(word)? else { continue };
let bitmap = let bitmap =
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?; RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
docids |= bitmap; docids |= bitmap;
} }
if *nbr_typos == 0 { if *nbr_typos == 0 {
if let Some(bytes) = if let Some(bytes) = ctx.get_prefix_docids(derivations.original)? {
db_cache.get_prefix_docids(index, txn, &derivations.original)?
{
let bitmap = let bitmap =
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?; RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
docids |= bitmap; docids |= bitmap;
@ -66,26 +60,22 @@ impl RankingRuleGraphTrait for TypoGraph {
} }
} }
fn build_visit_from_node<'transaction>( fn build_visit_from_node<'search>(
_index: &Index, _ctx: &mut SearchContext<'search>,
_txn: &'transaction RoTxn,
_db_cache: &mut DatabaseCache<'transaction>,
_from_node: &QueryNode, _from_node: &QueryNode,
) -> Result<Option<Self::BuildVisitedFromNode>> { ) -> Result<Option<Self::BuildVisitedFromNode>> {
Ok(Some(())) Ok(Some(()))
} }
fn build_visit_to_node<'from_data, 'transaction: 'from_data>( fn build_visit_to_node<'from_data, 'search: 'from_data>(
_index: &Index, _ctx: &mut SearchContext<'search>,
_txn: &'transaction RoTxn,
_db_cache: &mut DatabaseCache<'transaction>,
to_node: &QueryNode, to_node: &QueryNode,
_from_node_data: &'from_data Self::BuildVisitedFromNode, _from_node_data: &'from_data Self::BuildVisitedFromNode,
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> { ) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
match to_node { match to_node {
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value { QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
QueryTerm::Phrase { phrase } => { &QueryTerm::Phrase { phrase } => {
Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase { phrase: phrase.clone() }))]) Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase { phrase }))])
} }
QueryTerm::Word { derivations } => { QueryTerm::Word { derivations } => {
let mut edges = vec![]; let mut edges = vec![];

View File

@ -1,33 +1,28 @@
use heed::RoTxn;
use roaring::RoaringBitmap;
use super::db_cache::DatabaseCache;
use super::logger::SearchLogger; use super::logger::SearchLogger;
use super::QueryGraph; use super::QueryGraph;
use super::SearchContext;
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule; use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
use crate::new::ranking_rule_graph::ProximityGraph; use crate::new::ranking_rule_graph::ProximityGraph;
use crate::new::ranking_rule_graph::TypoGraph; use crate::new::ranking_rule_graph::TypoGraph;
use crate::new::words::Words; use crate::new::words::Words;
use roaring::RoaringBitmap;
// use crate::search::new::sort::Sort; // use crate::search::new::sort::Sort;
use crate::{Index, Result, TermsMatchingStrategy}; use crate::{Result, TermsMatchingStrategy};
pub trait RankingRuleOutputIter<'transaction, Query> { pub trait RankingRuleOutputIter<'search, Query> {
fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>>; fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>>;
} }
pub struct RankingRuleOutputIterWrapper<'transaction, Query> { pub struct RankingRuleOutputIterWrapper<'search, Query> {
iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'transaction>, iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'search>,
} }
impl<'transaction, Query> RankingRuleOutputIterWrapper<'transaction, Query> { impl<'search, Query> RankingRuleOutputIterWrapper<'search, Query> {
pub fn new( pub fn new(iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'search>) -> Self {
iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'transaction>,
) -> Self {
Self { iter } Self { iter }
} }
} }
impl<'transaction, Query> RankingRuleOutputIter<'transaction, Query> impl<'search, Query> RankingRuleOutputIter<'search, Query>
for RankingRuleOutputIterWrapper<'transaction, Query> for RankingRuleOutputIterWrapper<'search, Query>
{ {
fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>> { fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>> {
match self.iter.next() { match self.iter.next() {
@ -44,7 +39,7 @@ pub struct PlaceholderQuery;
impl RankingRuleQueryTrait for PlaceholderQuery {} impl RankingRuleQueryTrait for PlaceholderQuery {}
impl RankingRuleQueryTrait for QueryGraph {} impl RankingRuleQueryTrait for QueryGraph {}
pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> { pub trait RankingRule<'search, Query: RankingRuleQueryTrait> {
fn id(&self) -> String; fn id(&self) -> String;
/// Prepare the ranking rule such that it can start iterating over its /// Prepare the ranking rule such that it can start iterating over its
@ -53,9 +48,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
/// The given universe is the universe that will be given to [`next_bucket`](RankingRule::next_bucket). /// The given universe is the universe that will be given to [`next_bucket`](RankingRule::next_bucket).
fn start_iteration( fn start_iteration(
&mut self, &mut self,
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
logger: &mut dyn SearchLogger<Query>, logger: &mut dyn SearchLogger<Query>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
query: &Query, query: &Query,
@ -70,9 +63,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
/// - the universe given to [`start_iteration`](RankingRule::start_iteration) /// - the universe given to [`start_iteration`](RankingRule::start_iteration)
fn next_bucket( fn next_bucket(
&mut self, &mut self,
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
logger: &mut dyn SearchLogger<Query>, logger: &mut dyn SearchLogger<Query>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<Query>>>; ) -> Result<Option<RankingRuleOutput<Query>>>;
@ -81,9 +72,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
/// The next call to this ranking rule, if any, will be [`start_iteration`](RankingRule::start_iteration). /// The next call to this ranking rule, if any, will be [`start_iteration`](RankingRule::start_iteration).
fn end_iteration( fn end_iteration(
&mut self, &mut self,
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
logger: &mut dyn SearchLogger<Query>, logger: &mut dyn SearchLogger<Query>,
); );
} }
@ -98,11 +87,9 @@ pub struct RankingRuleOutput<Q> {
// TODO: can make it generic over the query type (either query graph or placeholder) fairly easily // TODO: can make it generic over the query type (either query graph or placeholder) fairly easily
#[allow(clippy::too_many_arguments)] #[allow(clippy::too_many_arguments)]
pub fn apply_ranking_rules<'transaction>( pub fn apply_ranking_rules<'search>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction heed::RoTxn,
// TODO: ranking rules parameter // TODO: ranking rules parameter
db_cache: &mut DatabaseCache<'transaction>,
query_graph: &QueryGraph, query_graph: &QueryGraph,
universe: &RoaringBitmap, universe: &RoaringBitmap,
from: usize, from: usize,
@ -115,7 +102,7 @@ pub fn apply_ranking_rules<'transaction>(
let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned()); let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned());
let typo = &mut GraphBasedRankingRule::<TypoGraph>::new("typo".to_owned()); let typo = &mut GraphBasedRankingRule::<TypoGraph>::new("typo".to_owned());
// TODO: ranking rules given as argument // TODO: ranking rules given as argument
let mut ranking_rules: Vec<&mut dyn RankingRule<'transaction, QueryGraph>> = let mut ranking_rules: Vec<&mut dyn RankingRule<'search, QueryGraph>> =
vec![words, typo, proximity /*sort*/]; vec![words, typo, proximity /*sort*/];
logger.ranking_rules(&ranking_rules); logger.ranking_rules(&ranking_rules);
@ -126,7 +113,7 @@ pub fn apply_ranking_rules<'transaction>(
let ranking_rules_len = ranking_rules.len(); let ranking_rules_len = ranking_rules.len();
logger.start_iteration_ranking_rule(0, ranking_rules[0], query_graph, universe); logger.start_iteration_ranking_rule(0, ranking_rules[0], query_graph, universe);
ranking_rules[0].start_iteration(index, txn, db_cache, logger, universe, query_graph)?; ranking_rules[0].start_iteration(ctx, logger, universe, query_graph)?;
let mut candidates = vec![RoaringBitmap::default(); ranking_rules_len]; let mut candidates = vec![RoaringBitmap::default(); ranking_rules_len];
candidates[0] = universe.clone(); candidates[0] = universe.clone();
@ -142,7 +129,7 @@ pub fn apply_ranking_rules<'transaction>(
&candidates[cur_ranking_rule_index], &candidates[cur_ranking_rule_index],
); );
candidates[cur_ranking_rule_index].clear(); candidates[cur_ranking_rule_index].clear();
ranking_rules[cur_ranking_rule_index].end_iteration(index, txn, db_cache, logger); ranking_rules[cur_ranking_rule_index].end_iteration(ctx, logger);
if cur_ranking_rule_index == 0 { if cur_ranking_rule_index == 0 {
break; break;
} else { } else {
@ -206,7 +193,7 @@ pub fn apply_ranking_rules<'transaction>(
continue; continue;
} }
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(index, txn, db_cache, logger, &candidates[cur_ranking_rule_index])? else { let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(ctx, logger, &candidates[cur_ranking_rule_index])? else {
// TODO: add remaining candidates automatically here? // TODO: add remaining candidates automatically here?
back!(); back!();
continue; continue;
@ -239,9 +226,7 @@ pub fn apply_ranking_rules<'transaction>(
&candidates[cur_ranking_rule_index], &candidates[cur_ranking_rule_index],
); );
ranking_rules[cur_ranking_rule_index].start_iteration( ranking_rules[cur_ranking_rule_index].start_iteration(
index, ctx,
txn,
db_cache,
logger, logger,
&next_bucket.candidates, &next_bucket.candidates,
&next_bucket.query, &next_bucket.query,
@ -255,9 +240,7 @@ pub fn apply_ranking_rules<'transaction>(
mod tests { mod tests {
// use crate::allocator::ALLOC; // use crate::allocator::ALLOC;
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use crate::index::tests::TempIndex; use crate::new::{execute_search, SearchContext};
use crate::new::db_cache::DatabaseCache;
use crate::new::execute_search;
use big_s::S; use big_s::S;
use heed::EnvOpenOptions; use heed::EnvOpenOptions;
use maplit::hashset; use maplit::hashset;
@ -269,55 +252,6 @@ mod tests {
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy}; use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy};
#[test]
fn execute_new_search() {
let index = TempIndex::new();
index
.add_documents(documents!([
{
"id": 7,
"text": "the super quick super brown fox jumps over",
},
{
"id": 8,
"text": "the super quick brown fox jumps over",
},
{
"id": 9,
"text": "the quick super brown fox jumps over",
},
{
"id": 10,
"text": "the quick brown fox jumps over",
},
{
"id": 11,
"text": "the quick brown fox jumps over the lazy dog",
},
{
"id": 12,
"text": "the quick brown cat jumps over the lazy dog",
},
]))
.unwrap();
let txn = index.read_txn().unwrap();
let mut db_cache = DatabaseCache::default();
let results = execute_search(
&index,
&txn,
&mut db_cache,
"releases from poison by the government",
None,
0,
50,
&mut DefaultSearchLogger,
)
.unwrap();
println!("{results:?}")
}
#[test] #[test]
fn search_wiki_new() { fn search_wiki_new() {
let mut options = EnvOpenOptions::new(); let mut options = EnvOpenOptions::new();
@ -331,24 +265,20 @@ mod tests {
// loop { // loop {
let start = Instant::now(); let start = Instant::now();
let mut db_cache = DatabaseCache::default(); // let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
let results = execute_search( let results = execute_search(
&index, &mut SearchContext::new(&index, &txn),
&txn,
&mut db_cache,
"releases from poison by the government", "releases from poison by the government",
None, None,
0, 0,
20, 20,
// &mut DefaultSearchLogger, &mut DefaultSearchLogger,
&mut logger, // &mut logger,
) )
.unwrap(); .unwrap();
logger.write_d2_description(); // logger.write_d2_description();
let elapsed = start.elapsed(); let elapsed = start.elapsed();
@ -425,19 +355,15 @@ mod tests {
let index = Index::new(options, "data_movies").unwrap(); let index = Index::new(options, "data_movies").unwrap();
let txn = index.read_txn().unwrap(); let txn = index.read_txn().unwrap();
let primary_key = index.primary_key(&txn).unwrap().unwrap(); // let primary_key = index.primary_key(&txn).unwrap().unwrap();
let primary_key = index.fields_ids_map(&txn).unwrap().id(primary_key).unwrap(); // let primary_key = index.fields_ids_map(&txn).unwrap().id(primary_key).unwrap();
// loop { // loop {
let start = Instant::now(); let start = Instant::now();
let mut db_cache = DatabaseCache::default();
let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log"); let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
let mut ctx = SearchContext::new(&index, &txn);
let results = execute_search( let results = execute_search(
&index, &mut ctx,
&txn,
&mut db_cache,
"releases from poison by the government", "releases from poison by the government",
None, None,
0, 0,
@ -447,24 +373,24 @@ mod tests {
) )
.unwrap(); .unwrap();
logger.write_d2_description(); logger.write_d2_description(&mut ctx);
let elapsed = start.elapsed(); let elapsed = start.elapsed();
let ids = index // let ids = index
.documents(&txn, results.iter().copied()) // .documents(&txn, results.iter().copied())
.unwrap() // .unwrap()
.into_iter() // .into_iter()
.map(|x| { // .map(|x| {
let obkv = &x.1; // let obkv = &x.1;
let id = obkv.get(primary_key).unwrap(); // let id = obkv.get(primary_key).unwrap();
let id: serde_json::Value = serde_json::from_slice(id).unwrap(); // let id: serde_json::Value = serde_json::from_slice(id).unwrap();
id.as_str().unwrap().to_owned() // id.as_str().unwrap().to_owned()
}) // })
.collect::<Vec<_>>(); // .collect::<Vec<_>>();
println!("{}us: {results:?}", elapsed.as_micros()); println!("{}us: {results:?}", elapsed.as_micros());
println!("external ids: {ids:?}"); // println!("external ids: {ids:?}");
// } // }
} }

View File

@ -1,34 +1,28 @@
use std::collections::VecDeque; use super::interner::Interned;
use fxhash::FxHashMap;
use heed::{BytesDecode, RoTxn};
use roaring::{MultiOps, RoaringBitmap};
use super::db_cache::DatabaseCache;
use super::query_term::{Phrase, QueryTerm, WordDerivations}; use super::query_term::{Phrase, QueryTerm, WordDerivations};
use super::{QueryGraph, QueryNode}; use super::{QueryGraph, QueryNode, SearchContext};
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec}; use fxhash::FxHashMap;
use heed::BytesDecode;
use roaring::{MultiOps, RoaringBitmap};
use std::collections::VecDeque;
// TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc. // TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc.
#[derive(Default)] #[derive(Default)]
pub struct NodeDocIdsCache { pub struct NodeDocIdsCache {
pub cache: FxHashMap<u32, RoaringBitmap>, pub cache: FxHashMap<u32, RoaringBitmap>,
} }
impl NodeDocIdsCache { impl<'search> SearchContext<'search> {
fn get_docids<'cache, 'transaction>( fn get_node_docids<'cache>(
&'cache mut self, &'cache mut self,
index: &Index,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
term: &QueryTerm, term: &QueryTerm,
node_idx: u32, node_idx: u32,
) -> Result<&'cache RoaringBitmap> { ) -> Result<&'cache RoaringBitmap> {
if self.cache.contains_key(&node_idx) { if self.node_docids_cache.cache.contains_key(&node_idx) {
return Ok(&self.cache[&node_idx]); return Ok(&self.node_docids_cache.cache[&node_idx]);
}; };
let docids = match term { let docids = match term {
QueryTerm::Phrase { phrase } => resolve_phrase(index, txn, db_cache, phrase)?, QueryTerm::Phrase { phrase } => resolve_phrase(self, *phrase)?,
QueryTerm::Word { QueryTerm::Word {
derivations: derivations:
WordDerivations { WordDerivations {
@ -42,15 +36,14 @@ impl NodeDocIdsCache {
}, },
} => { } => {
let mut or_docids = vec![]; let mut or_docids = vec![];
for word in zero_typo.iter().chain(one_typo.iter()).chain(two_typos.iter()) { for word in zero_typo.iter().chain(one_typo.iter()).chain(two_typos.iter()).copied()
if let Some(word_docids) = db_cache.get_word_docids(index, txn, word)? { {
if let Some(word_docids) = self.get_word_docids(word)? {
or_docids.push(word_docids); or_docids.push(word_docids);
} }
} }
if *use_prefix_db { if *use_prefix_db {
if let Some(prefix_docids) = if let Some(prefix_docids) = self.get_prefix_docids(*original)? {
db_cache.get_prefix_docids(index, txn, original.as_str())?
{
or_docids.push(prefix_docids); or_docids.push(prefix_docids);
} }
} }
@ -58,32 +51,25 @@ impl NodeDocIdsCache {
.into_iter() .into_iter()
.map(|slice| RoaringBitmapCodec::bytes_decode(slice).unwrap()) .map(|slice| RoaringBitmapCodec::bytes_decode(slice).unwrap())
.collect::<Vec<_>>(); .collect::<Vec<_>>();
for synonym in synonyms { for synonym in synonyms.iter().copied() {
// TODO: cache resolve_phrase? // TODO: cache resolve_phrase?
docids.push(resolve_phrase(index, txn, db_cache, synonym)?); docids.push(resolve_phrase(self, synonym)?);
} }
if let Some((left, right)) = split_words { if let Some(split_words) = split_words {
if let Some(split_word_docids) = docids.push(resolve_phrase(self, *split_words)?);
db_cache.get_word_pair_proximity_docids(index, txn, left, right, 1)?
{
docids.push(CboRoaringBitmapCodec::deserialize_from(split_word_docids)?);
}
} }
MultiOps::union(docids) MultiOps::union(docids)
} }
}; };
let _ = self.cache.insert(node_idx, docids); let _ = self.node_docids_cache.cache.insert(node_idx, docids);
let docids = &self.cache[&node_idx]; let docids = &self.node_docids_cache.cache[&node_idx];
Ok(docids) Ok(docids)
} }
} }
pub fn resolve_query_graph<'transaction>( pub fn resolve_query_graph<'search>(
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
node_docids_cache: &mut NodeDocIdsCache,
q: &QueryGraph, q: &QueryGraph,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<RoaringBitmap> { ) -> Result<RoaringBitmap> {
@ -111,8 +97,7 @@ pub fn resolve_query_graph<'transaction>(
let node_docids = match n { let node_docids = match n {
QueryNode::Term(located_term) => { QueryNode::Term(located_term) => {
let term = &located_term.value; let term = &located_term.value;
let derivations_docids = let derivations_docids = ctx.get_node_docids(term, node)?;
node_docids_cache.get_docids(index, txn, db_cache, term, node)?;
predecessors_docids & derivations_docids predecessors_docids & derivations_docids
} }
QueryNode::Deleted => { QueryNode::Deleted => {
@ -143,13 +128,8 @@ pub fn resolve_query_graph<'transaction>(
panic!() panic!()
} }
pub fn resolve_phrase<'transaction>( pub fn resolve_phrase(ctx: &mut SearchContext, phrase: Interned<Phrase>) -> Result<RoaringBitmap> {
index: &Index, let Phrase { words } = ctx.phrase_interner.get(phrase).clone();
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
phrase: &Phrase,
) -> Result<RoaringBitmap> {
let Phrase { words } = phrase;
let mut candidates = RoaringBitmap::new(); let mut candidates = RoaringBitmap::new();
let mut first_iter = true; let mut first_iter = true;
let winsize = words.len().min(3); let winsize = words.len().min(3);
@ -161,19 +141,19 @@ pub fn resolve_phrase<'transaction>(
for win in words.windows(winsize) { for win in words.windows(winsize) {
// Get all the documents with the matching distance for each word pairs. // Get all the documents with the matching distance for each word pairs.
let mut bitmaps = Vec::with_capacity(winsize.pow(2)); let mut bitmaps = Vec::with_capacity(winsize.pow(2));
for (offset, s1) in win for (offset, &s1) in win
.iter() .iter()
.enumerate() .enumerate()
.filter_map(|(index, word)| word.as_ref().map(|word| (index, word))) .filter_map(|(index, word)| word.as_ref().map(|word| (index, word)))
{ {
for (dist, s2) in win for (dist, &s2) in win
.iter() .iter()
.skip(offset + 1) .skip(offset + 1)
.enumerate() .enumerate()
.filter_map(|(index, word)| word.as_ref().map(|word| (index, word))) .filter_map(|(index, word)| word.as_ref().map(|word| (index, word)))
{ {
if dist == 0 { if dist == 0 {
match db_cache.get_word_pair_proximity_docids(index, txn, s1, s2, 1)? { match ctx.get_word_pair_proximity_docids(s1, s2, 1)? {
Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?), Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?),
// If there are no documents for this pair, there will be no // If there are no documents for this pair, there will be no
// results for the phrase query. // results for the phrase query.
@ -182,13 +162,9 @@ pub fn resolve_phrase<'transaction>(
} else { } else {
let mut bitmap = RoaringBitmap::new(); let mut bitmap = RoaringBitmap::new();
for dist in 0..=dist { for dist in 0..=dist {
if let Some(m) = db_cache.get_word_pair_proximity_docids( if let Some(m) =
index, ctx.get_word_pair_proximity_docids(s1, s2, dist as u8 + 1)?
txn, {
s1,
s2,
dist as u8 + 1,
)? {
bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?; bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?;
} }
} }

View File

@ -1,11 +1,7 @@
use heed::RoTxn;
use roaring::RoaringBitmap;
use super::db_cache::DatabaseCache;
use super::logger::SearchLogger; use super::logger::SearchLogger;
use super::{ use super::{
RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper, RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper,
RankingRuleQueryTrait, RankingRuleQueryTrait, SearchContext,
}; };
use crate::{ use crate::{
// facet::FacetType, // facet::FacetType,
@ -15,18 +11,19 @@ use crate::{
Index, Index,
Result, Result,
}; };
use roaring::RoaringBitmap;
pub struct Sort<'transaction, Query> { pub struct Sort<'search, Query> {
field_name: String, field_name: String,
field_id: Option<FieldId>, field_id: Option<FieldId>,
is_ascending: bool, is_ascending: bool,
original_query: Option<Query>, original_query: Option<Query>,
iter: Option<RankingRuleOutputIterWrapper<'transaction, Query>>, iter: Option<RankingRuleOutputIterWrapper<'search, Query>>,
} }
impl<'transaction, Query> Sort<'transaction, Query> { impl<'search, Query> Sort<'search, Query> {
pub fn new( pub fn _new(
index: &Index, index: &Index,
rtxn: &'transaction heed::RoTxn, rtxn: &'search heed::RoTxn,
field_name: String, field_name: String,
is_ascending: bool, is_ascending: bool,
) -> Result<Self> { ) -> Result<Self> {
@ -37,18 +34,14 @@ impl<'transaction, Query> Sort<'transaction, Query> {
} }
} }
impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query> impl<'search, Query: RankingRuleQueryTrait> RankingRule<'search, Query> for Sort<'search, Query> {
for Sort<'transaction, Query>
{
fn id(&self) -> String { fn id(&self) -> String {
let Self { field_name, is_ascending, .. } = self; let Self { field_name, is_ascending, .. } = self;
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " }) format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " })
} }
fn start_iteration( fn start_iteration(
&mut self, &mut self,
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
_db_cache: &mut DatabaseCache<'transaction>,
_logger: &mut dyn SearchLogger<Query>, _logger: &mut dyn SearchLogger<Query>,
parent_candidates: &RoaringBitmap, parent_candidates: &RoaringBitmap,
parent_query_graph: &Query, parent_query_graph: &Query,
@ -59,8 +52,8 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
if self.is_ascending { ascending_facet_sort } else { descending_facet_sort }; if self.is_ascending { ascending_facet_sort } else { descending_facet_sort };
let number_iter = make_iter( let number_iter = make_iter(
txn, ctx.txn,
index ctx.index
.facet_id_f64_docids .facet_id_f64_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id, field_id,
@ -68,8 +61,8 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
)?; )?;
let string_iter = make_iter( let string_iter = make_iter(
txn, ctx.txn,
index ctx.index
.facet_id_string_docids .facet_id_string_docids
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(), .remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
field_id, field_id,
@ -91,9 +84,7 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
fn next_bucket( fn next_bucket(
&mut self, &mut self,
_index: &Index, _ctx: &mut SearchContext<'search>,
_txn: &'transaction RoTxn,
_db_cache: &mut DatabaseCache<'transaction>,
_logger: &mut dyn SearchLogger<Query>, _logger: &mut dyn SearchLogger<Query>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<Query>>> { ) -> Result<Option<RankingRuleOutput<Query>>> {
@ -110,9 +101,7 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
fn end_iteration( fn end_iteration(
&mut self, &mut self,
_index: &Index, _ctx: &mut SearchContext<'search>,
_txn: &'transaction RoTxn,
_db_cache: &mut DatabaseCache<'transaction>,
_logger: &mut dyn SearchLogger<Query>, _logger: &mut dyn SearchLogger<Query>,
) { ) {
self.original_query = None; self.original_query = None;

View File

@ -1,13 +1,9 @@
use std::collections::BTreeSet;
use heed::RoTxn;
use roaring::RoaringBitmap;
use super::db_cache::DatabaseCache;
use super::logger::SearchLogger; use super::logger::SearchLogger;
use super::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache}; use super::resolve_query_graph::resolve_query_graph;
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput}; use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput, SearchContext};
use crate::{Index, Result, TermsMatchingStrategy}; use crate::{Result, TermsMatchingStrategy};
use roaring::RoaringBitmap;
use std::collections::BTreeSet;
pub struct Words { pub struct Words {
exhausted: bool, exhausted: bool,
@ -15,7 +11,6 @@ pub struct Words {
iterating: bool, iterating: bool,
positions_to_remove: Vec<i8>, positions_to_remove: Vec<i8>,
terms_matching_strategy: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,
node_docids_cache: NodeDocIdsCache,
} }
impl Words { impl Words {
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self { pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
@ -25,20 +20,17 @@ impl Words {
iterating: false, iterating: false,
positions_to_remove: vec![], positions_to_remove: vec![],
terms_matching_strategy, terms_matching_strategy,
node_docids_cache: <_>::default(),
} }
} }
} }
impl<'transaction> RankingRule<'transaction, QueryGraph> for Words { impl<'search> RankingRule<'search, QueryGraph> for Words {
fn id(&self) -> String { fn id(&self) -> String {
"words".to_owned() "words".to_owned()
} }
fn start_iteration( fn start_iteration(
&mut self, &mut self,
_index: &Index, _ctx: &mut SearchContext<'search>,
_txn: &'transaction RoTxn,
_db_cache: &mut DatabaseCache<'transaction>,
_logger: &mut dyn SearchLogger<QueryGraph>, _logger: &mut dyn SearchLogger<QueryGraph>,
_parent_candidates: &RoaringBitmap, _parent_candidates: &RoaringBitmap,
parent_query_graph: &QueryGraph, parent_query_graph: &QueryGraph,
@ -71,9 +63,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
fn next_bucket( fn next_bucket(
&mut self, &mut self,
index: &Index, ctx: &mut SearchContext<'search>,
txn: &'transaction RoTxn,
db_cache: &mut DatabaseCache<'transaction>,
logger: &mut dyn SearchLogger<QueryGraph>, logger: &mut dyn SearchLogger<QueryGraph>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<Option<RankingRuleOutput<QueryGraph>>> { ) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
@ -87,14 +77,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
logger.log_words_state(query_graph); logger.log_words_state(query_graph);
let this_bucket = resolve_query_graph( let this_bucket = resolve_query_graph(ctx, query_graph, universe)?;
index,
txn,
db_cache,
&mut self.node_docids_cache,
query_graph,
universe,
)?;
let child_query_graph = query_graph.clone(); let child_query_graph = query_graph.clone();
loop { loop {
@ -115,9 +98,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
fn end_iteration( fn end_iteration(
&mut self, &mut self,
_index: &Index, _ctx: &mut SearchContext<'search>,
_txn: &'transaction RoTxn,
_db_cache: &mut DatabaseCache<'transaction>,
_logger: &mut dyn SearchLogger<QueryGraph>, _logger: &mut dyn SearchLogger<QueryGraph>,
) { ) {
self.iterating = false; self.iterating = false;