mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-26 22:50:07 +01:00
Intern all strings and phrases in the search logic
This commit is contained in:
parent
3f1729a17f
commit
e8c76cf7bf
@ -1,51 +1,48 @@
|
||||
use std::collections::hash_map::Entry;
|
||||
|
||||
use super::{interner::Interned, SearchContext};
|
||||
use crate::Result;
|
||||
use fxhash::FxHashMap;
|
||||
use heed::types::ByteSlice;
|
||||
use heed::RoTxn;
|
||||
|
||||
use crate::{Index, Result};
|
||||
use std::collections::hash_map::Entry;
|
||||
|
||||
#[derive(Default)]
|
||||
pub struct DatabaseCache<'transaction> {
|
||||
pub word_pair_proximity_docids: FxHashMap<(u8, String, String), Option<&'transaction [u8]>>,
|
||||
pub struct DatabaseCache<'search> {
|
||||
// TODO: interner for all database cache keys
|
||||
pub word_pair_proximity_docids:
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'search [u8]>>,
|
||||
pub word_prefix_pair_proximity_docids:
|
||||
FxHashMap<(u8, String, String), Option<&'transaction [u8]>>,
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'search [u8]>>,
|
||||
pub prefix_word_pair_proximity_docids:
|
||||
FxHashMap<(u8, String, String), Option<&'transaction [u8]>>,
|
||||
pub word_docids: FxHashMap<String, Option<&'transaction [u8]>>,
|
||||
pub exact_word_docids: FxHashMap<String, Option<&'transaction [u8]>>,
|
||||
pub word_prefix_docids: FxHashMap<String, Option<&'transaction [u8]>>,
|
||||
FxHashMap<(u8, Interned<String>, Interned<String>), Option<&'search [u8]>>,
|
||||
pub word_docids: FxHashMap<Interned<String>, Option<&'search [u8]>>,
|
||||
pub exact_word_docids: FxHashMap<Interned<String>, Option<&'search [u8]>>,
|
||||
pub word_prefix_docids: FxHashMap<Interned<String>, Option<&'search [u8]>>,
|
||||
}
|
||||
impl<'transaction> DatabaseCache<'transaction> {
|
||||
pub fn get_word_docids(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
word: &str,
|
||||
) -> Result<Option<&'transaction [u8]>> {
|
||||
let bitmap_ptr = match self.word_docids.entry(word.to_owned()) {
|
||||
impl<'search> SearchContext<'search> {
|
||||
pub fn get_word_docids(&mut self, word: Interned<String>) -> Result<Option<&'search [u8]>> {
|
||||
let bitmap_ptr = match self.db_cache.word_docids.entry(word) {
|
||||
Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(),
|
||||
Entry::Vacant(entry) => {
|
||||
let bitmap_ptr = index.word_docids.remap_data_type::<ByteSlice>().get(txn, word)?;
|
||||
let bitmap_ptr = self
|
||||
.index
|
||||
.word_docids
|
||||
.remap_data_type::<ByteSlice>()
|
||||
.get(self.txn, self.word_interner.get(word))?;
|
||||
entry.insert(bitmap_ptr);
|
||||
bitmap_ptr
|
||||
}
|
||||
};
|
||||
Ok(bitmap_ptr)
|
||||
}
|
||||
pub fn get_prefix_docids(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
prefix: &str,
|
||||
) -> Result<Option<&'transaction [u8]>> {
|
||||
pub fn get_prefix_docids(&mut self, prefix: Interned<String>) -> Result<Option<&'search [u8]>> {
|
||||
// In the future, this will be a frozen roaring bitmap
|
||||
let bitmap_ptr = match self.word_prefix_docids.entry(prefix.to_owned()) {
|
||||
let bitmap_ptr = match self.db_cache.word_prefix_docids.entry(prefix) {
|
||||
Entry::Occupied(bitmap_ptr) => *bitmap_ptr.get(),
|
||||
Entry::Vacant(entry) => {
|
||||
let bitmap_ptr =
|
||||
index.word_prefix_docids.remap_data_type::<ByteSlice>().get(txn, prefix)?;
|
||||
let bitmap_ptr = self
|
||||
.index
|
||||
.word_prefix_docids
|
||||
.remap_data_type::<ByteSlice>()
|
||||
.get(self.txn, self.word_interner.get(prefix))?;
|
||||
entry.insert(bitmap_ptr);
|
||||
bitmap_ptr
|
||||
}
|
||||
@ -55,14 +52,12 @@ impl<'transaction> DatabaseCache<'transaction> {
|
||||
|
||||
pub fn get_word_pair_proximity_docids(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
word1: &str,
|
||||
word2: &str,
|
||||
word1: Interned<String>,
|
||||
word2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<&'transaction [u8]>> {
|
||||
let key = (proximity, word1.to_owned(), word2.to_owned());
|
||||
match self.word_pair_proximity_docids.entry(key.clone()) {
|
||||
) -> Result<Option<&'search [u8]>> {
|
||||
let key = (proximity, word1, word2);
|
||||
match self.db_cache.word_pair_proximity_docids.entry(key) {
|
||||
Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()),
|
||||
Entry::Vacant(entry) => {
|
||||
// We shouldn't greedily access this DB at all
|
||||
@ -86,10 +81,11 @@ impl<'transaction> DatabaseCache<'transaction> {
|
||||
// output.push(word1, word2, proximities);
|
||||
// }
|
||||
// }
|
||||
let bitmap_ptr = index
|
||||
.word_pair_proximity_docids
|
||||
.remap_data_type::<ByteSlice>()
|
||||
.get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?;
|
||||
let bitmap_ptr =
|
||||
self.index.word_pair_proximity_docids.remap_data_type::<ByteSlice>().get(
|
||||
self.txn,
|
||||
&(key.0, self.word_interner.get(key.1), self.word_interner.get(key.2)),
|
||||
)?;
|
||||
entry.insert(bitmap_ptr);
|
||||
Ok(bitmap_ptr)
|
||||
}
|
||||
@ -98,20 +94,22 @@ impl<'transaction> DatabaseCache<'transaction> {
|
||||
|
||||
pub fn get_word_prefix_pair_proximity_docids(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
word1: &str,
|
||||
prefix2: &str,
|
||||
word1: Interned<String>,
|
||||
prefix2: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<&'transaction [u8]>> {
|
||||
let key = (proximity, word1.to_owned(), prefix2.to_owned());
|
||||
match self.word_prefix_pair_proximity_docids.entry(key.clone()) {
|
||||
) -> Result<Option<&'search [u8]>> {
|
||||
let key = (proximity, word1, prefix2);
|
||||
match self.db_cache.word_prefix_pair_proximity_docids.entry(key) {
|
||||
Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()),
|
||||
Entry::Vacant(entry) => {
|
||||
let bitmap_ptr = index
|
||||
let bitmap_ptr = self
|
||||
.index
|
||||
.word_prefix_pair_proximity_docids
|
||||
.remap_data_type::<ByteSlice>()
|
||||
.get(txn, &(key.0, key.1.as_str(), key.2.as_str()))?;
|
||||
.get(
|
||||
self.txn,
|
||||
&(key.0, self.word_interner.get(key.1), self.word_interner.get(key.2)),
|
||||
)?;
|
||||
entry.insert(bitmap_ptr);
|
||||
Ok(bitmap_ptr)
|
||||
}
|
||||
@ -119,20 +117,26 @@ impl<'transaction> DatabaseCache<'transaction> {
|
||||
}
|
||||
pub fn get_prefix_word_pair_proximity_docids(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
left_prefix: &str,
|
||||
right: &str,
|
||||
left_prefix: Interned<String>,
|
||||
right: Interned<String>,
|
||||
proximity: u8,
|
||||
) -> Result<Option<&'transaction [u8]>> {
|
||||
let key = (proximity, left_prefix.to_owned(), right.to_owned());
|
||||
match self.prefix_word_pair_proximity_docids.entry(key) {
|
||||
) -> Result<Option<&'search [u8]>> {
|
||||
let key = (proximity, left_prefix, right);
|
||||
match self.db_cache.prefix_word_pair_proximity_docids.entry(key) {
|
||||
Entry::Occupied(bitmap_ptr) => Ok(*bitmap_ptr.get()),
|
||||
Entry::Vacant(entry) => {
|
||||
let bitmap_ptr = index
|
||||
let bitmap_ptr = self
|
||||
.index
|
||||
.prefix_word_pair_proximity_docids
|
||||
.remap_data_type::<ByteSlice>()
|
||||
.get(txn, &(proximity, left_prefix, right))?;
|
||||
.get(
|
||||
self.txn,
|
||||
&(
|
||||
proximity,
|
||||
self.word_interner.get(left_prefix),
|
||||
self.word_interner.get(right),
|
||||
),
|
||||
)?;
|
||||
entry.insert(bitmap_ptr);
|
||||
Ok(bitmap_ptr)
|
||||
}
|
||||
|
@ -1,15 +1,11 @@
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::db_cache::DatabaseCache;
|
||||
use super::logger::SearchLogger;
|
||||
use super::ranking_rule_graph::EdgeDocidsCache;
|
||||
use super::ranking_rule_graph::EmptyPathsCache;
|
||||
|
||||
use super::ranking_rule_graph::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use super::SearchContext;
|
||||
use super::{BitmapOrAllRef, QueryGraph, RankingRule, RankingRuleOutput};
|
||||
|
||||
use crate::{Index, Result};
|
||||
use crate::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
pub struct GraphBasedRankingRule<G: RankingRuleGraphTrait> {
|
||||
id: String,
|
||||
@ -29,12 +25,10 @@ pub struct GraphBasedRankingRuleState<G: RankingRuleGraphTrait> {
|
||||
cur_distance_idx: usize,
|
||||
}
|
||||
|
||||
fn remove_empty_edges<'transaction, G: RankingRuleGraphTrait>(
|
||||
fn remove_empty_edges<'search, G: RankingRuleGraphTrait>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
graph: &mut RankingRuleGraph<G>,
|
||||
edge_docids_cache: &mut EdgeDocidsCache<G>,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
universe: &RoaringBitmap,
|
||||
empty_paths_cache: &mut EmptyPathsCache,
|
||||
) -> Result<()> {
|
||||
@ -42,8 +36,7 @@ fn remove_empty_edges<'transaction, G: RankingRuleGraphTrait>(
|
||||
if graph.all_edges[edge_index as usize].is_none() {
|
||||
continue;
|
||||
}
|
||||
let docids = edge_docids_cache
|
||||
.get_edge_docids(index, txn, db_cache, edge_index, &*graph, universe)?;
|
||||
let docids = edge_docids_cache.get_edge_docids(ctx, edge_index, &*graph, universe)?;
|
||||
match docids {
|
||||
BitmapOrAllRef::Bitmap(bitmap) => {
|
||||
if bitmap.is_disjoint(universe) {
|
||||
@ -59,7 +52,7 @@ fn remove_empty_edges<'transaction, G: RankingRuleGraphTrait>(
|
||||
Ok(())
|
||||
}
|
||||
|
||||
impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGraph>
|
||||
impl<'search, G: RankingRuleGraphTrait> RankingRule<'search, QueryGraph>
|
||||
for GraphBasedRankingRule<G>
|
||||
{
|
||||
fn id(&self) -> String {
|
||||
@ -67,24 +60,20 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
||||
}
|
||||
fn start_iteration(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
query_graph: &QueryGraph,
|
||||
) -> Result<()> {
|
||||
// TODO: update old state instead of starting from scratch
|
||||
let mut graph = RankingRuleGraph::build(index, txn, db_cache, query_graph.clone())?;
|
||||
let mut graph = RankingRuleGraph::build(ctx, query_graph.clone())?;
|
||||
let mut edge_docids_cache = EdgeDocidsCache::default();
|
||||
let mut empty_paths_cache = EmptyPathsCache::new(graph.all_edges.len());
|
||||
|
||||
remove_empty_edges(
|
||||
ctx,
|
||||
&mut graph,
|
||||
&mut edge_docids_cache,
|
||||
index,
|
||||
txn,
|
||||
db_cache,
|
||||
universe,
|
||||
&mut empty_paths_cache,
|
||||
)?;
|
||||
@ -105,20 +94,16 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
||||
|
||||
fn next_bucket(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||
assert!(universe.len() > 1);
|
||||
let mut state = self.state.take().unwrap();
|
||||
remove_empty_edges(
|
||||
ctx,
|
||||
&mut state.graph,
|
||||
&mut state.edge_docids_cache,
|
||||
index,
|
||||
txn,
|
||||
db_cache,
|
||||
universe,
|
||||
&mut state.empty_paths_cache,
|
||||
)?;
|
||||
@ -151,9 +136,7 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
||||
);
|
||||
|
||||
let bucket = state.graph.resolve_paths(
|
||||
index,
|
||||
txn,
|
||||
db_cache,
|
||||
ctx,
|
||||
&mut state.edge_docids_cache,
|
||||
&mut state.empty_paths_cache,
|
||||
universe,
|
||||
@ -169,9 +152,7 @@ impl<'transaction, G: RankingRuleGraphTrait> RankingRule<'transaction, QueryGrap
|
||||
|
||||
fn end_iteration(
|
||||
&mut self,
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
_ctx: &mut SearchContext<'search>,
|
||||
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) {
|
||||
self.state = None;
|
||||
|
78
milli/src/search/new/interner.rs
Normal file
78
milli/src/search/new/interner.rs
Normal file
@ -0,0 +1,78 @@
|
||||
use fxhash::FxHashMap;
|
||||
use std::hash::Hash;
|
||||
use std::marker::PhantomData;
|
||||
|
||||
pub struct Interned<T> {
|
||||
idx: u32,
|
||||
_phantom: PhantomData<T>,
|
||||
}
|
||||
|
||||
impl<T> Interned<T> {
|
||||
fn new(idx: u32) -> Self {
|
||||
Self { idx, _phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
pub struct Interner<T> {
|
||||
stable_store: Vec<T>,
|
||||
lookup: FxHashMap<T, Interned<T>>,
|
||||
}
|
||||
impl<T> Default for Interner<T> {
|
||||
fn default() -> Self {
|
||||
Self { stable_store: Default::default(), lookup: Default::default() }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Interner<T>
|
||||
where
|
||||
T: Clone + Eq + Hash,
|
||||
{
|
||||
pub fn insert(&mut self, s: T) -> Interned<T> {
|
||||
if let Some(interned) = self.lookup.get(&s) {
|
||||
*interned
|
||||
} else {
|
||||
self.stable_store.push(s.clone());
|
||||
let interned = Interned::new(self.stable_store.len() as u32 - 1);
|
||||
self.lookup.insert(s, interned);
|
||||
interned
|
||||
}
|
||||
}
|
||||
pub fn get(&self, interned: Interned<T>) -> &T {
|
||||
&self.stable_store[interned.idx as usize]
|
||||
}
|
||||
}
|
||||
|
||||
// Interned<T> boilerplate implementations
|
||||
|
||||
impl<T> Hash for Interned<T> {
|
||||
fn hash<H: std::hash::Hasher>(&self, state: &mut H) {
|
||||
self.idx.hash(state);
|
||||
}
|
||||
}
|
||||
|
||||
impl<T: Ord> Ord for Interned<T> {
|
||||
fn cmp(&self, other: &Self) -> std::cmp::Ordering {
|
||||
self.idx.cmp(&other.idx)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> PartialOrd for Interned<T> {
|
||||
fn partial_cmp(&self, other: &Self) -> Option<std::cmp::Ordering> {
|
||||
self.idx.partial_cmp(&other.idx)
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Eq for Interned<T> {}
|
||||
|
||||
impl<T> PartialEq for Interned<T> {
|
||||
fn eq(&self, other: &Self) -> bool {
|
||||
self.idx == other.idx
|
||||
}
|
||||
}
|
||||
impl<T> Clone for Interned<T> {
|
||||
fn clone(&self) -> Self {
|
||||
Self { idx: self.idx, _phantom: PhantomData }
|
||||
}
|
||||
}
|
||||
|
||||
impl<T> Copy for Interned<T> {}
|
@ -6,7 +6,7 @@ use std::time::Instant;
|
||||
use std::{io::Write, path::PathBuf};
|
||||
|
||||
use crate::new::ranking_rule_graph::TypoGraph;
|
||||
use crate::new::{QueryNode, QueryGraph};
|
||||
use crate::new::{QueryNode, QueryGraph, SearchContext};
|
||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::new::ranking_rule_graph::EmptyPathsCache;
|
||||
use crate::new::ranking_rule_graph::{Edge, EdgeDetails, RankingRuleGraphTrait};
|
||||
@ -176,7 +176,7 @@ impl SearchLogger<QueryGraph> for DetailedSearchLogger {
|
||||
}
|
||||
|
||||
impl DetailedSearchLogger {
|
||||
pub fn write_d2_description(&self) {
|
||||
pub fn write_d2_description(&self,ctx: &mut SearchContext,) {
|
||||
let mut prev_time = self.initial_query_time.unwrap();
|
||||
let mut timestamp = vec![];
|
||||
fn activated_id(timestamp: &[usize]) -> String {
|
||||
@ -193,12 +193,12 @@ impl DetailedSearchLogger {
|
||||
writeln!(&mut file, "direction: right").unwrap();
|
||||
writeln!(&mut file, "Initial Query Graph: {{").unwrap();
|
||||
let initial_query_graph = self.initial_query.as_ref().unwrap();
|
||||
Self::query_graph_d2_description(initial_query_graph, &mut file);
|
||||
Self::query_graph_d2_description(ctx, initial_query_graph, &mut file);
|
||||
writeln!(&mut file, "}}").unwrap();
|
||||
|
||||
writeln!(&mut file, "Query Graph Used To Compute Universe: {{").unwrap();
|
||||
let query_graph_for_universe = self.query_for_universe.as_ref().unwrap();
|
||||
Self::query_graph_d2_description(query_graph_for_universe, &mut file);
|
||||
Self::query_graph_d2_description(ctx, query_graph_for_universe, &mut file);
|
||||
writeln!(&mut file, "}}").unwrap();
|
||||
|
||||
let initial_universe = self.initial_universe.as_ref().unwrap();
|
||||
@ -308,7 +308,7 @@ results.{random} {{
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::query_graph_d2_description(query_graph, &mut new_file);
|
||||
Self::query_graph_d2_description(ctx, query_graph, &mut new_file);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
@ -323,7 +323,7 @@ results.{random} {{
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::ranking_rule_graph_d2_description(graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
|
||||
Self::ranking_rule_graph_d2_description(ctx, graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
@ -339,7 +339,7 @@ results.{random} {{
|
||||
let id = format!("{cur_ranking_rule}.{cur_activated_id}");
|
||||
let new_file_path = self.folder_path.join(format!("{id}.d2"));
|
||||
let mut new_file = std::fs::File::create(new_file_path).unwrap();
|
||||
Self::ranking_rule_graph_d2_description(graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
|
||||
Self::ranking_rule_graph_d2_description(ctx,graph, paths, empty_paths_cache, distances.clone(), &mut new_file);
|
||||
writeln!(
|
||||
&mut file,
|
||||
"{id} {{
|
||||
@ -352,31 +352,40 @@ results.{random} {{
|
||||
writeln!(&mut file, "}}").unwrap();
|
||||
}
|
||||
|
||||
fn query_node_d2_desc(node_idx: usize, node: &QueryNode, _distances: &[u64], file: &mut File) {
|
||||
fn query_node_d2_desc(ctx: &mut SearchContext, node_idx: usize, node: &QueryNode, _distances: &[u64], file: &mut File) {
|
||||
match &node {
|
||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => {
|
||||
match value {
|
||||
QueryTerm::Phrase { phrase } => {
|
||||
let phrase_str = phrase.description();
|
||||
let phrase = ctx.phrase_interner.get(*phrase);
|
||||
let phrase_str = phrase.description(&ctx.word_interner);
|
||||
writeln!(file,"{node_idx} : \"{phrase_str}\"").unwrap();
|
||||
},
|
||||
QueryTerm::Word { derivations: WordDerivations { original, zero_typo, one_typo, two_typos, use_prefix_db, synonyms, split_words } } => {
|
||||
let original = ctx.word_interner.get(*original);
|
||||
writeln!(file,"{node_idx} : \"{original}\" {{
|
||||
shape: class").unwrap();
|
||||
for w in zero_typo {
|
||||
for w in zero_typo.iter().copied() {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "\"{w}\" : 0").unwrap();
|
||||
}
|
||||
for w in one_typo {
|
||||
for w in one_typo.iter().copied() {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "\"{w}\" : 1").unwrap();
|
||||
}
|
||||
for w in two_typos {
|
||||
for w in two_typos.iter().copied() {
|
||||
let w = ctx.word_interner.get(w);
|
||||
writeln!(file, "\"{w}\" : 2").unwrap();
|
||||
}
|
||||
if let Some((left, right)) = split_words {
|
||||
writeln!(file, "\"{left} {right}\" : split_words").unwrap();
|
||||
if let Some(split_words) = split_words {
|
||||
let phrase = ctx.phrase_interner.get(*split_words);
|
||||
let phrase_str = phrase.description(&ctx.word_interner);
|
||||
writeln!(file, "\"{phrase_str}\" : split_words").unwrap();
|
||||
}
|
||||
for synonym in synonyms {
|
||||
writeln!(file, "\"{}\" : synonym", synonym.description()).unwrap();
|
||||
for synonym in synonyms.iter().copied() {
|
||||
let phrase = ctx.phrase_interner.get(synonym);
|
||||
let phrase_str = phrase.description(&ctx.word_interner);
|
||||
writeln!(file, "\"{phrase_str}\" : synonym").unwrap();
|
||||
}
|
||||
if *use_prefix_db {
|
||||
writeln!(file, "use prefix DB : true").unwrap();
|
||||
@ -398,20 +407,20 @@ shape: class").unwrap();
|
||||
},
|
||||
}
|
||||
}
|
||||
fn query_graph_d2_description(query_graph: &QueryGraph, file: &mut File) {
|
||||
fn query_graph_d2_description(ctx: &mut SearchContext, query_graph: &QueryGraph, file: &mut File) {
|
||||
writeln!(file,"direction: right").unwrap();
|
||||
for node in 0..query_graph.nodes.len() {
|
||||
if matches!(query_graph.nodes[node], QueryNode::Deleted) {
|
||||
continue;
|
||||
}
|
||||
Self::query_node_d2_desc(node, &query_graph.nodes[node], &[], file);
|
||||
Self::query_node_d2_desc(ctx, node, &query_graph.nodes[node], &[], file);
|
||||
|
||||
for edge in query_graph.edges[node].successors.iter() {
|
||||
writeln!(file, "{node} -> {edge};\n").unwrap();
|
||||
}
|
||||
}
|
||||
}
|
||||
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<u64>>, file: &mut File) {
|
||||
fn ranking_rule_graph_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], _empty_paths_cache: &EmptyPathsCache, distances: Vec<Vec<u64>>, file: &mut File) {
|
||||
writeln!(file,"direction: right").unwrap();
|
||||
|
||||
writeln!(file, "Proximity Graph {{").unwrap();
|
||||
@ -420,7 +429,7 @@ shape: class").unwrap();
|
||||
continue;
|
||||
}
|
||||
let distances = &distances[node_idx];
|
||||
Self::query_node_d2_desc(node_idx, node, distances.as_slice(), file);
|
||||
Self::query_node_d2_desc(ctx, node_idx, node, distances.as_slice(), file);
|
||||
}
|
||||
for edge in graph.all_edges.iter().flatten() {
|
||||
let Edge { from_node, to_node, details, .. } = edge;
|
||||
@ -449,7 +458,7 @@ shape: class").unwrap();
|
||||
|
||||
|
||||
writeln!(file, "Shortest Paths {{").unwrap();
|
||||
Self::paths_d2_description(graph, paths, file);
|
||||
Self::paths_d2_description(ctx, graph, paths, file);
|
||||
writeln!(file, "}}").unwrap();
|
||||
|
||||
// writeln!(file, "Empty Edge Couples {{").unwrap();
|
||||
@ -468,15 +477,18 @@ shape: class").unwrap();
|
||||
// }
|
||||
// writeln!(file, "}}").unwrap();
|
||||
}
|
||||
fn edge_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, edge_idx: u32, file: &mut File) {
|
||||
fn edge_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext,graph: &RankingRuleGraph<R>, edge_idx: u32, file: &mut File) {
|
||||
let Edge { from_node, to_node, cost, .. } = graph.all_edges[edge_idx as usize].as_ref().unwrap() ;
|
||||
let from_node = &graph.query_graph.nodes[*from_node as usize];
|
||||
let from_node_desc = match from_node {
|
||||
QueryNode::Term(term) => match &term.value {
|
||||
QueryTerm::Phrase { phrase } => {
|
||||
phrase.description()
|
||||
let phrase = ctx.phrase_interner.get(*phrase);
|
||||
phrase.description(&ctx.word_interner)
|
||||
},
|
||||
QueryTerm::Word { derivations } => {
|
||||
ctx.word_interner.get(derivations.original).to_owned()
|
||||
},
|
||||
QueryTerm::Word { derivations } => derivations.original.clone(),
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => "START".to_owned(),
|
||||
@ -485,8 +497,11 @@ shape: class").unwrap();
|
||||
let to_node = &graph.query_graph.nodes[*to_node as usize];
|
||||
let to_node_desc = match to_node {
|
||||
QueryNode::Term(term) => match &term.value {
|
||||
QueryTerm::Phrase { phrase } => phrase.description(),
|
||||
QueryTerm::Word { derivations } => derivations.original.clone(),
|
||||
QueryTerm::Phrase { phrase } => {
|
||||
let phrase = ctx.phrase_interner.get(*phrase);
|
||||
phrase.description(&ctx.word_interner)
|
||||
},
|
||||
QueryTerm::Word { derivations } => ctx.word_interner.get(derivations.original).to_owned(),
|
||||
},
|
||||
QueryNode::Deleted => panic!(),
|
||||
QueryNode::Start => "START".to_owned(),
|
||||
@ -496,11 +511,11 @@ shape: class").unwrap();
|
||||
shape: class
|
||||
}}").unwrap();
|
||||
}
|
||||
fn paths_d2_description<R: RankingRuleGraphTrait>(graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], file: &mut File) {
|
||||
fn paths_d2_description<R: RankingRuleGraphTrait>(ctx: &mut SearchContext, graph: &RankingRuleGraph<R>, paths: &[Vec<u32>], file: &mut File) {
|
||||
for (path_idx, edge_indexes) in paths.iter().enumerate() {
|
||||
writeln!(file, "{path_idx} {{").unwrap();
|
||||
for edge_idx in edge_indexes.iter() {
|
||||
Self::edge_d2_description(graph, *edge_idx, file);
|
||||
Self::edge_d2_description(ctx, graph, *edge_idx, file);
|
||||
}
|
||||
for couple_edges in edge_indexes.windows(2) {
|
||||
let [src_edge_idx, dest_edge_idx] = couple_edges else { panic!() };
|
||||
|
@ -1,5 +1,6 @@
|
||||
mod db_cache;
|
||||
mod graph_based_ranking_rule;
|
||||
mod interner;
|
||||
mod logger;
|
||||
mod query_graph;
|
||||
mod query_term;
|
||||
@ -26,7 +27,9 @@ use query_graph::{QueryGraph, QueryNode};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use self::{
|
||||
interner::Interner,
|
||||
logger::SearchLogger,
|
||||
query_term::Phrase,
|
||||
resolve_query_graph::{resolve_query_graph, NodeDocIdsCache},
|
||||
};
|
||||
|
||||
@ -35,14 +38,32 @@ pub enum BitmapOrAllRef<'s> {
|
||||
All,
|
||||
}
|
||||
|
||||
pub struct SearchContext<'search> {
|
||||
pub index: &'search Index,
|
||||
pub txn: &'search RoTxn<'search>,
|
||||
pub db_cache: DatabaseCache<'search>,
|
||||
pub word_interner: Interner<String>,
|
||||
pub phrase_interner: Interner<Phrase>,
|
||||
pub node_docids_cache: NodeDocIdsCache,
|
||||
}
|
||||
impl<'search> SearchContext<'search> {
|
||||
pub fn new(index: &'search Index, txn: &'search RoTxn<'search>) -> Self {
|
||||
Self {
|
||||
index,
|
||||
txn,
|
||||
db_cache: <_>::default(),
|
||||
word_interner: <_>::default(),
|
||||
phrase_interner: <_>::default(),
|
||||
node_docids_cache: <_>::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn resolve_maximally_reduced_query_graph<'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction heed::RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
pub fn resolve_maximally_reduced_query_graph<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
universe: &RoaringBitmap,
|
||||
query_graph: &QueryGraph,
|
||||
node_docids_cache: &mut NodeDocIdsCache,
|
||||
matching_strategy: TermsMatchingStrategy,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) -> Result<RoaringBitmap> {
|
||||
@ -73,16 +94,14 @@ pub fn resolve_maximally_reduced_query_graph<'transaction>(
|
||||
}
|
||||
}
|
||||
logger.query_for_universe(&graph);
|
||||
let docids = resolve_query_graph(index, txn, db_cache, node_docids_cache, &graph, universe)?;
|
||||
let docids = resolve_query_graph(ctx, &graph, universe)?;
|
||||
|
||||
Ok(docids)
|
||||
}
|
||||
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn execute_search<'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
pub fn execute_search<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
query: &str,
|
||||
filters: Option<Filter>,
|
||||
from: usize,
|
||||
@ -90,26 +109,21 @@ pub fn execute_search<'transaction>(
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) -> Result<Vec<u32>> {
|
||||
assert!(!query.is_empty());
|
||||
let query_terms = located_query_terms_from_string(index, txn, query.tokenize(), None).unwrap();
|
||||
let graph = QueryGraph::from_query(index, txn, db_cache, query_terms)?;
|
||||
let query_terms = located_query_terms_from_string(ctx, query.tokenize(), None).unwrap();
|
||||
let graph = QueryGraph::from_query(ctx, query_terms)?;
|
||||
|
||||
logger.initial_query(&graph);
|
||||
|
||||
let universe = if let Some(filters) = filters {
|
||||
filters.evaluate(txn, index)?
|
||||
filters.evaluate(ctx.txn, ctx.index)?
|
||||
} else {
|
||||
index.documents_ids(txn)?
|
||||
ctx.index.documents_ids(ctx.txn)?
|
||||
};
|
||||
|
||||
let mut node_docids_cache = NodeDocIdsCache::default();
|
||||
|
||||
let universe = resolve_maximally_reduced_query_graph(
|
||||
index,
|
||||
txn,
|
||||
db_cache,
|
||||
ctx,
|
||||
&universe,
|
||||
&graph,
|
||||
&mut node_docids_cache,
|
||||
TermsMatchingStrategy::Last,
|
||||
logger,
|
||||
)?;
|
||||
@ -117,5 +131,5 @@ pub fn execute_search<'transaction>(
|
||||
|
||||
logger.initial_universe(&universe);
|
||||
|
||||
apply_ranking_rules(index, txn, db_cache, &graph, &universe, from, length, logger)
|
||||
apply_ranking_rules(ctx, &graph, &universe, from, length, logger)
|
||||
}
|
||||
|
@ -1,13 +1,10 @@
|
||||
use super::query_term::{self, LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use super::SearchContext;
|
||||
use crate::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
use std::fmt::Debug;
|
||||
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::db_cache::DatabaseCache;
|
||||
use super::query_term::{self, LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::{Index, Result};
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Clone)]
|
||||
pub enum QueryNode {
|
||||
Term(LocatedQueryTerm),
|
||||
Deleted,
|
||||
@ -22,7 +19,7 @@ pub struct Edges {
|
||||
pub successors: RoaringBitmap,
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Clone)]
|
||||
pub struct QueryGraph {
|
||||
pub root_node: u32,
|
||||
pub end_node: u32,
|
||||
@ -31,8 +28,8 @@ pub struct QueryGraph {
|
||||
}
|
||||
|
||||
fn _assert_sizes() {
|
||||
// TODO: QueryNodes are too big now, 184B is an unreasonable size
|
||||
let _: [u8; 184] = [0; std::mem::size_of::<QueryNode>()];
|
||||
// TODO: QueryNodes are too big now, 88B is a bit too big
|
||||
let _: [u8; 88] = [0; std::mem::size_of::<QueryNode>()];
|
||||
let _: [u8; 48] = [0; std::mem::size_of::<Edges>()];
|
||||
}
|
||||
|
||||
@ -72,19 +69,14 @@ impl QueryGraph {
|
||||
|
||||
impl QueryGraph {
|
||||
// TODO: return the list of all matching words here as well
|
||||
pub fn from_query<'transaction>(
|
||||
index: &Index,
|
||||
txn: &RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
terms: Vec<LocatedQueryTerm>,
|
||||
) -> Result<QueryGraph> {
|
||||
pub fn from_query(ctx: &mut SearchContext, terms: Vec<LocatedQueryTerm>) -> Result<QueryGraph> {
|
||||
// TODO: maybe empty nodes should not be removed here, to compute
|
||||
// the score of the `words` ranking rule correctly
|
||||
// it is very easy to traverse the graph and remove afterwards anyway
|
||||
// Still, I'm keeping this here as a demo
|
||||
let mut empty_nodes = vec![];
|
||||
|
||||
let word_set = index.words_fst(txn)?;
|
||||
let word_set = ctx.index.words_fst(ctx.txn)?;
|
||||
let mut graph = QueryGraph::default();
|
||||
|
||||
let (mut prev2, mut prev1, mut prev0): (Vec<u32>, Vec<u32>, Vec<u32>) =
|
||||
@ -105,20 +97,20 @@ impl QueryGraph {
|
||||
|
||||
if !prev1.is_empty() {
|
||||
if let Some((ngram2_str, ngram2_pos)) =
|
||||
query_term::ngram2(&query[length - 2], &query[length - 1])
|
||||
query_term::ngram2(ctx, &query[length - 2], &query[length - 1])
|
||||
{
|
||||
if word_set.contains(ngram2_str.as_bytes()) {
|
||||
if word_set.contains(ctx.word_interner.get(ngram2_str)) {
|
||||
let ngram2 = LocatedQueryTerm {
|
||||
value: QueryTerm::Word {
|
||||
derivations: WordDerivations {
|
||||
original: ngram2_str.clone(),
|
||||
original: ngram2_str,
|
||||
// TODO: could add a typo if it's an ngram?
|
||||
zero_typo: vec![ngram2_str],
|
||||
one_typo: vec![],
|
||||
two_typos: vec![],
|
||||
zero_typo: Box::new([ngram2_str]),
|
||||
one_typo: Box::new([]),
|
||||
two_typos: Box::new([]),
|
||||
use_prefix_db: false,
|
||||
synonyms: vec![], // TODO: ngram synonyms
|
||||
split_words: None, // TODO: maybe ngram split words?
|
||||
synonyms: Box::new([]), // TODO: ngram synonyms
|
||||
split_words: None, // TODO: maybe ngram split words?
|
||||
},
|
||||
},
|
||||
positions: ngram2_pos,
|
||||
@ -129,22 +121,25 @@ impl QueryGraph {
|
||||
}
|
||||
}
|
||||
if !prev2.is_empty() {
|
||||
if let Some((ngram3_str, ngram3_pos)) =
|
||||
query_term::ngram3(&query[length - 3], &query[length - 2], &query[length - 1])
|
||||
{
|
||||
if word_set.contains(ngram3_str.as_bytes()) {
|
||||
if let Some((ngram3_str, ngram3_pos)) = query_term::ngram3(
|
||||
ctx,
|
||||
&query[length - 3],
|
||||
&query[length - 2],
|
||||
&query[length - 1],
|
||||
) {
|
||||
if word_set.contains(ctx.word_interner.get(ngram3_str)) {
|
||||
let ngram3 = LocatedQueryTerm {
|
||||
value: QueryTerm::Word {
|
||||
derivations: WordDerivations {
|
||||
original: ngram3_str.clone(),
|
||||
original: ngram3_str,
|
||||
// TODO: could add a typo if it's an ngram?
|
||||
zero_typo: vec![ngram3_str],
|
||||
one_typo: vec![],
|
||||
two_typos: vec![],
|
||||
zero_typo: Box::new([ngram3_str]),
|
||||
one_typo: Box::new([]),
|
||||
two_typos: Box::new([]),
|
||||
use_prefix_db: false,
|
||||
synonyms: vec![], // TODO: ngram synonyms
|
||||
split_words: None, // TODO: maybe ngram split words?
|
||||
// would be nice for typos like su nflower
|
||||
synonyms: Box::new([]), // TODO: ngram synonyms
|
||||
split_words: None, // TODO: maybe ngram split words?
|
||||
// would be nice for typos like su nflower
|
||||
},
|
||||
},
|
||||
positions: ngram3_pos,
|
||||
|
@ -16,30 +16,35 @@ use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||
use crate::search::{build_dfa, get_first};
|
||||
use crate::{CboRoaringBitmapLenCodec, Index, Result};
|
||||
|
||||
#[derive(Debug, Default, Clone)]
|
||||
use super::interner::{Interned, Interner};
|
||||
use super::SearchContext;
|
||||
|
||||
#[derive(Default, Clone, PartialEq, Eq, Hash)]
|
||||
pub struct Phrase {
|
||||
pub words: Vec<Option<String>>,
|
||||
pub words: Vec<Option<Interned<String>>>,
|
||||
}
|
||||
impl Phrase {
|
||||
pub fn description(&self) -> String {
|
||||
self.words.iter().flatten().join(" ")
|
||||
pub fn description(&self, interner: &Interner<String>) -> String {
|
||||
self.words.iter().flatten().map(|w| interner.get(*w)).join(" ")
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Clone)]
|
||||
pub struct WordDerivations {
|
||||
pub original: String,
|
||||
pub original: Interned<String>,
|
||||
// TODO: pub prefix_of: Vec<String>,
|
||||
pub synonyms: Vec<Phrase>,
|
||||
pub split_words: Option<(String, String)>,
|
||||
pub zero_typo: Vec<String>,
|
||||
pub one_typo: Vec<String>,
|
||||
pub two_typos: Vec<String>,
|
||||
pub synonyms: Box<[Interned<Phrase>]>,
|
||||
pub split_words: Option<Interned<Phrase>>,
|
||||
pub zero_typo: Box<[Interned<String>]>,
|
||||
pub one_typo: Box<[Interned<String>]>,
|
||||
pub two_typos: Box<[Interned<String>]>,
|
||||
pub use_prefix_db: bool,
|
||||
}
|
||||
impl WordDerivations {
|
||||
pub fn all_derivations_except_prefix_db(&self) -> impl Iterator<Item = &String> + Clone {
|
||||
self.zero_typo.iter().chain(self.one_typo.iter()).chain(self.two_typos.iter())
|
||||
pub fn all_derivations_except_prefix_db(
|
||||
&'_ self,
|
||||
) -> impl Iterator<Item = Interned<String>> + Clone + '_ {
|
||||
self.zero_typo.iter().chain(self.one_typo.iter()).chain(self.two_typos.iter()).copied()
|
||||
}
|
||||
fn is_empty(&self) -> bool {
|
||||
self.zero_typo.is_empty()
|
||||
@ -50,15 +55,21 @@ impl WordDerivations {
|
||||
}
|
||||
|
||||
pub fn word_derivations(
|
||||
index: &Index,
|
||||
txn: &RoTxn,
|
||||
ctx: &mut SearchContext,
|
||||
word: &str,
|
||||
max_typo: u8,
|
||||
is_prefix: bool,
|
||||
fst: &fst::Set<Cow<[u8]>>,
|
||||
) -> Result<WordDerivations> {
|
||||
let word_interned = ctx.word_interner.insert(word.to_owned());
|
||||
|
||||
let use_prefix_db = is_prefix
|
||||
&& index.word_prefix_docids.remap_data_type::<DecodeIgnore>().get(txn, word)?.is_some();
|
||||
&& ctx
|
||||
.index
|
||||
.word_prefix_docids
|
||||
.remap_data_type::<DecodeIgnore>()
|
||||
.get(ctx.txn, word)?
|
||||
.is_some();
|
||||
|
||||
let mut zero_typo = vec![];
|
||||
let mut one_typo = vec![];
|
||||
@ -70,11 +81,12 @@ pub fn word_derivations(
|
||||
let mut stream = fst.search(prefix).into_stream();
|
||||
|
||||
while let Some(word) = stream.next() {
|
||||
let word = std::str::from_utf8(word)?;
|
||||
zero_typo.push(word.to_string());
|
||||
let word = std::str::from_utf8(word)?.to_owned();
|
||||
let word_interned = ctx.word_interner.insert(word);
|
||||
zero_typo.push(word_interned);
|
||||
}
|
||||
} else if fst.contains(word) {
|
||||
zero_typo.push(word.to_string());
|
||||
zero_typo.push(word_interned);
|
||||
}
|
||||
} else if max_typo == 1 {
|
||||
let dfa = build_dfa(word, 1, is_prefix);
|
||||
@ -83,13 +95,14 @@ pub fn word_derivations(
|
||||
|
||||
while let Some((word, state)) = stream.next() {
|
||||
let word = std::str::from_utf8(word)?;
|
||||
let word_interned = ctx.word_interner.insert(word.to_owned());
|
||||
let d = dfa.distance(state.1);
|
||||
match d.to_u8() {
|
||||
0 => {
|
||||
zero_typo.push(word.to_string());
|
||||
zero_typo.push(word_interned);
|
||||
}
|
||||
1 => {
|
||||
one_typo.push(word.to_string());
|
||||
one_typo.push(word_interned);
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
@ -105,47 +118,56 @@ pub fn word_derivations(
|
||||
|
||||
while let Some((found_word, state)) = stream.next() {
|
||||
let found_word = std::str::from_utf8(found_word)?;
|
||||
let found_word_interned = ctx.word_interner.insert(found_word.to_owned());
|
||||
// in the case the typo is on the first letter, we know the number of typo
|
||||
// is two
|
||||
if get_first(found_word) != get_first(word) {
|
||||
two_typos.push(found_word.to_string());
|
||||
two_typos.push(found_word_interned);
|
||||
} else {
|
||||
// Else, we know that it is the second dfa that matched and compute the
|
||||
// correct distance
|
||||
let d = second_dfa.distance((state.1).0);
|
||||
match d.to_u8() {
|
||||
0 => {
|
||||
zero_typo.push(found_word.to_string());
|
||||
zero_typo.push(found_word_interned);
|
||||
}
|
||||
1 => {
|
||||
one_typo.push(found_word.to_string());
|
||||
one_typo.push(found_word_interned);
|
||||
}
|
||||
2 => {
|
||||
two_typos.push(found_word.to_string());
|
||||
two_typos.push(found_word_interned);
|
||||
}
|
||||
_ => panic!(),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
let split_words = split_best_frequency(index, txn, word)?;
|
||||
let split_words = split_best_frequency(ctx.index, ctx.txn, word)?.map(|(l, r)| {
|
||||
ctx.phrase_interner.insert(Phrase {
|
||||
words: vec![Some(ctx.word_interner.insert(l)), Some(ctx.word_interner.insert(r))],
|
||||
})
|
||||
});
|
||||
|
||||
let synonyms = ctx.index.synonyms(ctx.txn)?;
|
||||
|
||||
let synonyms = index.synonyms(txn)?;
|
||||
let synonyms = synonyms
|
||||
.get(&vec![word.to_owned()])
|
||||
.cloned()
|
||||
.unwrap_or_default()
|
||||
.into_iter()
|
||||
.map(|words| Phrase { words: words.into_iter().map(Some).collect() })
|
||||
.map(|words| {
|
||||
let words = words.into_iter().map(|w| Some(ctx.word_interner.insert(w))).collect();
|
||||
ctx.phrase_interner.insert(Phrase { words })
|
||||
})
|
||||
.collect();
|
||||
|
||||
Ok(WordDerivations {
|
||||
original: word.to_owned(),
|
||||
original: ctx.word_interner.insert(word.to_owned()),
|
||||
synonyms,
|
||||
split_words,
|
||||
zero_typo,
|
||||
one_typo,
|
||||
two_typos,
|
||||
zero_typo: zero_typo.into_boxed_slice(),
|
||||
one_typo: one_typo.into_boxed_slice(),
|
||||
two_typos: two_typos.into_boxed_slice(),
|
||||
use_prefix_db,
|
||||
})
|
||||
}
|
||||
@ -176,33 +198,36 @@ fn split_best_frequency(
|
||||
Ok(best.map(|(_, left, right)| (left.to_owned(), right.to_owned())))
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Clone)]
|
||||
pub enum QueryTerm {
|
||||
// TODO: should there be SplitWord, NGram2, and NGram3 variants?
|
||||
// NGram2 can have 1 typo and synonyms
|
||||
// NGram3 cannot have typos but can have synonyms
|
||||
// SplitWords are a phrase
|
||||
// Can NGrams be prefixes?
|
||||
Phrase { phrase: Phrase },
|
||||
Phrase { phrase: Interned<Phrase> },
|
||||
Word { derivations: WordDerivations },
|
||||
}
|
||||
|
||||
impl QueryTerm {
|
||||
pub fn original_single_word(&self) -> Option<&str> {
|
||||
pub fn original_single_word<'interner>(
|
||||
&self,
|
||||
word_interner: &'interner Interner<String>,
|
||||
) -> Option<&'interner str> {
|
||||
match self {
|
||||
QueryTerm::Phrase { phrase: _ } => None,
|
||||
QueryTerm::Word { derivations } => {
|
||||
if derivations.is_empty() {
|
||||
None
|
||||
} else {
|
||||
Some(derivations.original.as_str())
|
||||
Some(word_interner.get(derivations.original))
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Clone)]
|
||||
pub struct LocatedQueryTerm {
|
||||
pub value: QueryTerm,
|
||||
pub positions: RangeInclusive<i8>,
|
||||
@ -217,18 +242,17 @@ impl LocatedQueryTerm {
|
||||
}
|
||||
}
|
||||
|
||||
pub fn located_query_terms_from_string<'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
pub fn located_query_terms_from_string<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
query: NormalizedTokenIter<Vec<u8>>,
|
||||
words_limit: Option<usize>,
|
||||
) -> Result<Vec<LocatedQueryTerm>> {
|
||||
let authorize_typos = index.authorize_typos(txn)?;
|
||||
let min_len_one_typo = index.min_word_len_one_typo(txn)?;
|
||||
let min_len_two_typos = index.min_word_len_two_typos(txn)?;
|
||||
let authorize_typos = ctx.index.authorize_typos(ctx.txn)?;
|
||||
let min_len_one_typo = ctx.index.min_word_len_one_typo(ctx.txn)?;
|
||||
let min_len_two_typos = ctx.index.min_word_len_two_typos(ctx.txn)?;
|
||||
|
||||
let exact_words = index.exact_words(txn)?;
|
||||
let fst = index.words_fst(txn)?;
|
||||
let exact_words = ctx.index.exact_words(ctx.txn)?;
|
||||
let fst = ctx.index.words_fst(ctx.txn)?;
|
||||
|
||||
let nbr_typos = |word: &str| {
|
||||
if !authorize_typos
|
||||
@ -243,10 +267,6 @@ pub fn located_query_terms_from_string<'transaction>(
|
||||
}
|
||||
};
|
||||
|
||||
let derivations = |word: &str, is_prefix: bool| {
|
||||
word_derivations(index, txn, word, nbr_typos(word), is_prefix, &fst)
|
||||
};
|
||||
|
||||
let mut primitive_query = Vec::new();
|
||||
let mut phrase = Vec::new();
|
||||
|
||||
@ -279,14 +299,17 @@ pub fn located_query_terms_from_string<'transaction>(
|
||||
if let TokenKind::StopWord = token.kind {
|
||||
phrase.push(None);
|
||||
} else {
|
||||
let word = ctx.word_interner.insert(token.lemma().to_string());
|
||||
// TODO: in a phrase, check that every word exists
|
||||
// otherwise return WordDerivations::Empty
|
||||
phrase.push(Some(token.lemma().to_string()));
|
||||
phrase.push(Some(word));
|
||||
}
|
||||
} else if peekable.peek().is_some() {
|
||||
match token.kind {
|
||||
TokenKind::Word => {
|
||||
let derivations = derivations(token.lemma(), false)?;
|
||||
let word = token.lemma();
|
||||
let derivations =
|
||||
word_derivations(ctx, word, nbr_typos(word), false, &fst)?;
|
||||
let located_term = LocatedQueryTerm {
|
||||
value: QueryTerm::Word { derivations },
|
||||
positions: position..=position,
|
||||
@ -296,7 +319,8 @@ pub fn located_query_terms_from_string<'transaction>(
|
||||
TokenKind::StopWord | TokenKind::Separator(_) | TokenKind::Unknown => {}
|
||||
}
|
||||
} else {
|
||||
let derivations = derivations(token.lemma(), true)?;
|
||||
let word = token.lemma();
|
||||
let derivations = word_derivations(ctx, word, nbr_typos(word), true, &fst)?;
|
||||
let located_term = LocatedQueryTerm {
|
||||
value: QueryTerm::Word { derivations },
|
||||
positions: position..=position,
|
||||
@ -323,7 +347,9 @@ pub fn located_query_terms_from_string<'transaction>(
|
||||
{
|
||||
let located_query_term = LocatedQueryTerm {
|
||||
value: QueryTerm::Phrase {
|
||||
phrase: Phrase { words: mem::take(&mut phrase) },
|
||||
phrase: ctx
|
||||
.phrase_interner
|
||||
.insert(Phrase { words: mem::take(&mut phrase) }),
|
||||
},
|
||||
positions: phrase_start..=phrase_end,
|
||||
};
|
||||
@ -337,7 +363,9 @@ pub fn located_query_terms_from_string<'transaction>(
|
||||
// If a quote is never closed, we consider all of the end of the query as a phrase.
|
||||
if !phrase.is_empty() {
|
||||
let located_query_term = LocatedQueryTerm {
|
||||
value: QueryTerm::Phrase { phrase: Phrase { words: mem::take(&mut phrase) } },
|
||||
value: QueryTerm::Phrase {
|
||||
phrase: ctx.phrase_interner.insert(Phrase { words: mem::take(&mut phrase) }),
|
||||
},
|
||||
positions: phrase_start..=phrase_end,
|
||||
};
|
||||
primitive_query.push(located_query_term);
|
||||
@ -347,35 +375,49 @@ pub fn located_query_terms_from_string<'transaction>(
|
||||
}
|
||||
|
||||
// TODO: return a word derivations instead?
|
||||
pub fn ngram2(x: &LocatedQueryTerm, y: &LocatedQueryTerm) -> Option<(String, RangeInclusive<i8>)> {
|
||||
pub fn ngram2(
|
||||
ctx: &mut SearchContext,
|
||||
x: &LocatedQueryTerm,
|
||||
y: &LocatedQueryTerm,
|
||||
) -> Option<(Interned<String>, RangeInclusive<i8>)> {
|
||||
if *x.positions.end() != y.positions.start() - 1 {
|
||||
return None;
|
||||
}
|
||||
match (&x.value.original_single_word(), &y.value.original_single_word()) {
|
||||
match (
|
||||
&x.value.original_single_word(&ctx.word_interner),
|
||||
&y.value.original_single_word(&ctx.word_interner),
|
||||
) {
|
||||
(Some(w1), Some(w2)) => {
|
||||
let term = (format!("{w1}{w2}"), *x.positions.start()..=*y.positions.end());
|
||||
let term = (
|
||||
ctx.word_interner.insert(format!("{w1}{w2}")),
|
||||
*x.positions.start()..=*y.positions.end(),
|
||||
);
|
||||
Some(term)
|
||||
}
|
||||
_ => None,
|
||||
}
|
||||
}
|
||||
pub fn ngram3(
|
||||
ctx: &mut SearchContext,
|
||||
x: &LocatedQueryTerm,
|
||||
y: &LocatedQueryTerm,
|
||||
z: &LocatedQueryTerm,
|
||||
) -> Option<(String, RangeInclusive<i8>)> {
|
||||
) -> Option<(Interned<String>, RangeInclusive<i8>)> {
|
||||
if *x.positions.end() != y.positions.start() - 1
|
||||
|| *y.positions.end() != z.positions.start() - 1
|
||||
{
|
||||
return None;
|
||||
}
|
||||
match (
|
||||
&x.value.original_single_word(),
|
||||
&y.value.original_single_word(),
|
||||
&z.value.original_single_word(),
|
||||
&x.value.original_single_word(&ctx.word_interner),
|
||||
&y.value.original_single_word(&ctx.word_interner),
|
||||
&z.value.original_single_word(&ctx.word_interner),
|
||||
) {
|
||||
(Some(w1), Some(w2), Some(w3)) => {
|
||||
let term = (format!("{w1}{w2}{w3}"), *x.positions.start()..=*z.positions.end());
|
||||
let term = (
|
||||
ctx.word_interner.insert(format!("{w1}{w2}{w3}")),
|
||||
*x.positions.start()..=*z.positions.end(),
|
||||
);
|
||||
Some(term)
|
||||
}
|
||||
_ => None,
|
||||
|
@ -1,18 +1,10 @@
|
||||
use heed::RoTxn;
|
||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::{QueryGraph, SearchContext};
|
||||
use crate::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Edge, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::QueryGraph;
|
||||
use crate::{Index, Result};
|
||||
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
pub fn build<'db_cache, 'transaction: 'db_cache>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
query_graph: QueryGraph,
|
||||
) -> Result<Self> {
|
||||
pub fn build(ctx: &mut SearchContext, query_graph: QueryGraph) -> Result<Self> {
|
||||
let mut ranking_rule_graph =
|
||||
Self { query_graph, all_edges: vec![], node_edges: vec![], successors: vec![] };
|
||||
|
||||
@ -22,12 +14,11 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
let new_edges = ranking_rule_graph.node_edges.last_mut().unwrap();
|
||||
let new_successors = ranking_rule_graph.successors.last_mut().unwrap();
|
||||
|
||||
let Some(from_node_data) = G::build_visit_from_node(index, txn, db_cache, node)? else { continue };
|
||||
let Some(from_node_data) = G::build_visit_from_node(ctx, node)? else { continue };
|
||||
|
||||
for successor_idx in ranking_rule_graph.query_graph.edges[node_idx].successors.iter() {
|
||||
let to_node = &ranking_rule_graph.query_graph.nodes[successor_idx as usize];
|
||||
let mut edges =
|
||||
G::build_visit_to_node(index, txn, db_cache, to_node, &from_node_data)?;
|
||||
let mut edges = G::build_visit_to_node(ctx, to_node, &from_node_data)?;
|
||||
if edges.is_empty() {
|
||||
continue;
|
||||
}
|
||||
|
@ -1,13 +1,10 @@
|
||||
use std::marker::PhantomData;
|
||||
|
||||
use fxhash::FxHashMap;
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::BitmapOrAllRef;
|
||||
use crate::{Index, Result};
|
||||
use crate::new::{BitmapOrAllRef, SearchContext};
|
||||
use crate::Result;
|
||||
use fxhash::FxHashMap;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
// TODO: the cache should have a G::EdgeDetails as key
|
||||
// but then it means that we should have a quick way of
|
||||
@ -25,11 +22,9 @@ impl<G: RankingRuleGraphTrait> Default for EdgeDocidsCache<G> {
|
||||
}
|
||||
}
|
||||
impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
||||
pub fn get_edge_docids<'s, 'transaction>(
|
||||
pub fn get_edge_docids<'s, 'search>(
|
||||
&'s mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge_index: u32,
|
||||
graph: &RankingRuleGraph<G>,
|
||||
// TODO: maybe universe doesn't belong here
|
||||
@ -46,7 +41,7 @@ impl<G: RankingRuleGraphTrait> EdgeDocidsCache<G> {
|
||||
return Ok(BitmapOrAllRef::Bitmap(&self.cache[&edge_index]));
|
||||
}
|
||||
// TODO: maybe universe doesn't belong here
|
||||
let docids = universe & G::compute_docids(index, txn, db_cache, details)?;
|
||||
let docids = universe & G::compute_docids(ctx, details)?;
|
||||
let _ = self.cache.insert(edge_index, docids);
|
||||
let docids = &self.cache[&edge_index];
|
||||
Ok(BitmapOrAllRef::Bitmap(docids))
|
||||
|
@ -7,20 +7,15 @@ mod proximity;
|
||||
mod resolve_paths;
|
||||
mod typo;
|
||||
|
||||
use super::logger::SearchLogger;
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
pub use edge_docids_cache::EdgeDocidsCache;
|
||||
pub use empty_paths_cache::EmptyPathsCache;
|
||||
pub use proximity::ProximityGraph;
|
||||
pub use typo::TypoGraph;
|
||||
|
||||
use std::ops::ControlFlow;
|
||||
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::db_cache::DatabaseCache;
|
||||
use super::logger::SearchLogger;
|
||||
use super::{QueryGraph, QueryNode};
|
||||
use crate::{Index, Result};
|
||||
use std::ops::ControlFlow;
|
||||
pub use typo::TypoGraph;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum EdgeDetails<E> {
|
||||
@ -42,6 +37,48 @@ pub struct EdgePointer<'graph, E> {
|
||||
pub edge: &'graph Edge<E>,
|
||||
}
|
||||
|
||||
// pub struct SubWordDerivations {
|
||||
// words: FxHashSet<Interned<String>>,
|
||||
// synonyms: FxHashSet<Interned<Phrase>>, // NO! they're phrases, not strings
|
||||
// split_words: bool,
|
||||
// use_prefix_db: bool,
|
||||
// }
|
||||
|
||||
// pub struct EdgeWordDerivations {
|
||||
// // TODO: not Option, instead: Any | All | Subset(SubWordDerivations)
|
||||
// from_words: Option<SubWordDerivations>, // ???
|
||||
// to_words: Option<SubWordDerivations>, // + use prefix db?
|
||||
// }
|
||||
|
||||
// fn aggregate_edge_word_derivations(
|
||||
// graph: (),
|
||||
// edges: Vec<usize>,
|
||||
// ) -> BTreeMap<usize, SubWordDerivations> {
|
||||
// todo!()
|
||||
// }
|
||||
|
||||
// fn reduce_word_term_to_sub_word_derivations(
|
||||
// term: &mut WordDerivations,
|
||||
// derivations: &SubWordDerivations,
|
||||
// ) {
|
||||
// let mut new_one_typo = vec![];
|
||||
// for w in term.one_typo {
|
||||
// if derivations.words.contains(w) {
|
||||
// new_one_typo.push(w);
|
||||
// }
|
||||
// }
|
||||
// if term.use_prefix_db && !derivations.use_prefix_db {
|
||||
// term.use_prefix_db = false;
|
||||
// }
|
||||
// // etc.
|
||||
// }
|
||||
|
||||
// fn word_derivations_used_by_edge<G: RankingRuleGraphTrait>(
|
||||
// edge: G::EdgeDetails,
|
||||
// ) -> SubWordDerivations {
|
||||
// todo!()
|
||||
// }
|
||||
|
||||
pub trait RankingRuleGraphTrait: Sized {
|
||||
/// The details of an edge connecting two query nodes. These details
|
||||
/// should be sufficient to compute the edge's cost and associated document ids
|
||||
@ -55,10 +92,8 @@ pub trait RankingRuleGraphTrait: Sized {
|
||||
fn graphviz_edge_details_label(edge: &Self::EdgeDetails) -> String;
|
||||
|
||||
/// Compute the document ids associated with the given edge.
|
||||
fn compute_docids<'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn compute_docids<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge_details: &Self::EdgeDetails,
|
||||
) -> Result<RoaringBitmap>;
|
||||
|
||||
@ -66,19 +101,15 @@ pub trait RankingRuleGraphTrait: Sized {
|
||||
///
|
||||
/// This call is followed by zero, one or more calls to [`build_visit_to_node`](RankingRuleGraphTrait::build_visit_to_node),
|
||||
/// which builds the actual edges.
|
||||
fn build_visit_from_node<'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_from_node<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
from_node: &QueryNode,
|
||||
) -> Result<Option<Self::BuildVisitedFromNode>>;
|
||||
|
||||
/// Return the cost and details of the edges going from the previously visited node
|
||||
/// (with [`build_visit_from_node`](RankingRuleGraphTrait::build_visit_from_node)) to `to_node`.
|
||||
fn build_visit_to_node<'from_data, 'transaction: 'from_data>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>>;
|
||||
|
@ -1,30 +1,30 @@
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
use heed::RoTxn;
|
||||
use itertools::Itertools;
|
||||
|
||||
use super::ProximityEdge;
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::query_term::{LocatedQueryTerm, QueryTerm, WordDerivations};
|
||||
use crate::new::ranking_rule_graph::proximity::WordPair;
|
||||
use crate::new::ranking_rule_graph::EdgeDetails;
|
||||
use crate::new::QueryNode;
|
||||
use crate::{Index, Result};
|
||||
use crate::new::{QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
use itertools::Itertools;
|
||||
use std::collections::BTreeMap;
|
||||
|
||||
pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations, i8)>> {
|
||||
pub fn visit_from_node(
|
||||
ctx: &mut SearchContext,
|
||||
from_node: &QueryNode,
|
||||
) -> Result<Option<(WordDerivations, i8)>> {
|
||||
Ok(Some(match from_node {
|
||||
QueryNode::Term(LocatedQueryTerm { value: value1, positions: pos1 }) => match value1 {
|
||||
QueryTerm::Word { derivations } => (derivations.clone(), *pos1.end()),
|
||||
QueryTerm::Phrase { phrase: phrase1 } => {
|
||||
if let Some(original) = phrase1.words.last().unwrap().as_ref() {
|
||||
let phrase1 = ctx.phrase_interner.get(*phrase1);
|
||||
if let Some(original) = *phrase1.words.last().unwrap() {
|
||||
(
|
||||
WordDerivations {
|
||||
original: original.clone(),
|
||||
zero_typo: vec![original.to_owned()],
|
||||
one_typo: vec![],
|
||||
two_typos: vec![],
|
||||
original,
|
||||
zero_typo: Box::new([original]),
|
||||
one_typo: Box::new([]),
|
||||
two_typos: Box::new([]),
|
||||
use_prefix_db: false,
|
||||
synonyms: vec![],
|
||||
synonyms: Box::new([]),
|
||||
split_words: None,
|
||||
},
|
||||
*pos1.end(),
|
||||
@ -37,12 +37,12 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations,
|
||||
},
|
||||
QueryNode::Start => (
|
||||
WordDerivations {
|
||||
original: String::new(),
|
||||
zero_typo: vec![],
|
||||
one_typo: vec![],
|
||||
two_typos: vec![],
|
||||
original: ctx.word_interner.insert(String::new()),
|
||||
zero_typo: Box::new([]),
|
||||
one_typo: Box::new([]),
|
||||
two_typos: Box::new([]),
|
||||
use_prefix_db: false,
|
||||
synonyms: vec![],
|
||||
synonyms: Box::new([]),
|
||||
split_words: None,
|
||||
},
|
||||
-100,
|
||||
@ -51,10 +51,8 @@ pub fn visit_from_node(from_node: &QueryNode) -> Result<Option<(WordDerivations,
|
||||
}))
|
||||
}
|
||||
|
||||
pub fn visit_to_node<'transaction, 'from_data>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
pub fn visit_to_node<'search, 'from_data>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
from_node_data: &'from_data (WordDerivations, i8),
|
||||
) -> Result<Vec<(u8, EdgeDetails<ProximityEdge>)>> {
|
||||
@ -69,15 +67,16 @@ pub fn visit_to_node<'transaction, 'from_data>(
|
||||
let (derivations2, pos2, ngram_len2) = match value2 {
|
||||
QueryTerm::Word { derivations } => (derivations.clone(), *pos2.start(), pos2.len()),
|
||||
QueryTerm::Phrase { phrase: phrase2 } => {
|
||||
if let Some(original) = phrase2.words.first().unwrap().as_ref() {
|
||||
let phrase2 = ctx.phrase_interner.get(*phrase2);
|
||||
if let Some(original) = *phrase2.words.first().unwrap() {
|
||||
(
|
||||
WordDerivations {
|
||||
original: original.clone(),
|
||||
zero_typo: vec![original.to_owned()],
|
||||
one_typo: vec![],
|
||||
two_typos: vec![],
|
||||
original,
|
||||
zero_typo: Box::new([original]),
|
||||
one_typo: Box::new([]),
|
||||
two_typos: Box::new([]),
|
||||
use_prefix_db: false,
|
||||
synonyms: vec![],
|
||||
synonyms: Box::new([]),
|
||||
split_words: None,
|
||||
},
|
||||
*pos2.start(),
|
||||
@ -106,19 +105,16 @@ pub fn visit_to_node<'transaction, 'from_data>(
|
||||
|
||||
let derivations1 = derivations1.all_derivations_except_prefix_db();
|
||||
// TODO: eventually, we want to get rid of the uses from `orginal`
|
||||
let original_word_2 = derivations2.original.clone();
|
||||
let mut cost_proximity_word_pairs = BTreeMap::<u8, BTreeMap<u8, Vec<WordPair>>>::new();
|
||||
|
||||
if updb2 {
|
||||
for word1 in derivations1.clone() {
|
||||
for proximity in 1..=(8 - ngram_len2) {
|
||||
let cost = (proximity + ngram_len2 - 1) as u8;
|
||||
if db_cache
|
||||
if ctx
|
||||
.get_word_prefix_pair_proximity_docids(
|
||||
index,
|
||||
txn,
|
||||
word1,
|
||||
original_word_2.as_str(),
|
||||
derivations2.original,
|
||||
proximity as u8,
|
||||
)?
|
||||
.is_some()
|
||||
@ -129,16 +125,14 @@ pub fn visit_to_node<'transaction, 'from_data>(
|
||||
.entry(proximity as u8)
|
||||
.or_default()
|
||||
.push(WordPair::WordPrefix {
|
||||
left: word1.to_owned(),
|
||||
right_prefix: original_word_2.to_owned(),
|
||||
left: word1,
|
||||
right_prefix: derivations2.original,
|
||||
});
|
||||
}
|
||||
if db_cache
|
||||
if ctx
|
||||
.get_prefix_word_pair_proximity_docids(
|
||||
index,
|
||||
txn,
|
||||
original_word_2.as_str(),
|
||||
word1.as_str(),
|
||||
derivations2.original,
|
||||
word1,
|
||||
proximity as u8 - 1,
|
||||
)?
|
||||
.is_some()
|
||||
@ -149,8 +143,8 @@ pub fn visit_to_node<'transaction, 'from_data>(
|
||||
.entry(proximity as u8)
|
||||
.or_default()
|
||||
.push(WordPair::WordPrefixSwapped {
|
||||
left_prefix: original_word_2.to_owned(),
|
||||
right: word1.to_owned(),
|
||||
left_prefix: derivations2.original,
|
||||
right: word1,
|
||||
});
|
||||
}
|
||||
}
|
||||
@ -164,28 +158,23 @@ pub fn visit_to_node<'transaction, 'from_data>(
|
||||
for (word1, word2) in product_derivations {
|
||||
for proximity in 1..=(8 - ngram_len2) {
|
||||
let cost = (proximity + ngram_len2 - 1) as u8;
|
||||
if db_cache
|
||||
.get_word_pair_proximity_docids(index, txn, word1, word2, proximity as u8)?
|
||||
.is_some()
|
||||
{
|
||||
if ctx.get_word_pair_proximity_docids(word1, word2, proximity as u8)?.is_some() {
|
||||
cost_proximity_word_pairs
|
||||
.entry(cost)
|
||||
.or_default()
|
||||
.entry(proximity as u8)
|
||||
.or_default()
|
||||
.push(WordPair::Words { left: word1.to_owned(), right: word2.to_owned() });
|
||||
.push(WordPair::Words { left: word1, right: word2 });
|
||||
}
|
||||
if proximity > 1
|
||||
&& db_cache
|
||||
.get_word_pair_proximity_docids(index, txn, word2, word1, proximity as u8 - 1)?
|
||||
.is_some()
|
||||
&& ctx.get_word_pair_proximity_docids(word2, word1, proximity as u8 - 1)?.is_some()
|
||||
{
|
||||
cost_proximity_word_pairs
|
||||
.entry(cost)
|
||||
.or_default()
|
||||
.entry(proximity as u8 - 1)
|
||||
.or_default()
|
||||
.push(WordPair::Words { left: word2.to_owned(), right: word1.to_owned() });
|
||||
.push(WordPair::Words { left: word2, right: word1 });
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1,14 +1,10 @@
|
||||
use heed::RoTxn;
|
||||
use super::{ProximityEdge, WordPair};
|
||||
use crate::new::SearchContext;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use super::{ProximityEdge, WordPair};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::{CboRoaringBitmapCodec, Result};
|
||||
|
||||
pub fn compute_docids<'transaction>(
|
||||
index: &crate::Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
pub fn compute_docids<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge: &ProximityEdge,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let ProximityEdge { pairs, proximity } = edge;
|
||||
@ -16,12 +12,14 @@ pub fn compute_docids<'transaction>(
|
||||
for pair in pairs.iter() {
|
||||
let bytes = match pair {
|
||||
WordPair::Words { left, right } => {
|
||||
db_cache.get_word_pair_proximity_docids(index, txn, left, right, *proximity)
|
||||
ctx.get_word_pair_proximity_docids(*left, *right, *proximity)
|
||||
}
|
||||
WordPair::WordPrefix { left, right_prefix } => {
|
||||
ctx.get_word_prefix_pair_proximity_docids(*left, *right_prefix, *proximity)
|
||||
}
|
||||
WordPair::WordPrefixSwapped { left_prefix, right } => {
|
||||
ctx.get_prefix_word_pair_proximity_docids(*left_prefix, *right, *proximity)
|
||||
}
|
||||
WordPair::WordPrefix { left, right_prefix } => db_cache
|
||||
.get_word_prefix_pair_proximity_docids(index, txn, left, right_prefix, *proximity),
|
||||
WordPair::WordPrefixSwapped { left_prefix, right } => db_cache
|
||||
.get_prefix_word_pair_proximity_docids(index, txn, left_prefix, right, *proximity),
|
||||
}?;
|
||||
let bitmap =
|
||||
bytes.map(CboRoaringBitmapCodec::deserialize_from).transpose()?.unwrap_or_default();
|
||||
|
@ -1,25 +1,22 @@
|
||||
pub mod build;
|
||||
pub mod compute_docids;
|
||||
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
|
||||
use super::{EdgeDetails, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::interner::Interned;
|
||||
use crate::new::logger::SearchLogger;
|
||||
use crate::new::query_term::WordDerivations;
|
||||
use crate::new::{QueryGraph, QueryNode};
|
||||
use crate::{Index, Result};
|
||||
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::Result;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
// TODO: intern the strings, refer to them by their pointer?
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
#[derive(Clone)]
|
||||
pub enum WordPair {
|
||||
Words { left: String, right: String },
|
||||
WordPrefix { left: String, right_prefix: String },
|
||||
WordPrefixSwapped { left_prefix: String, right: String },
|
||||
Words { left: Interned<String>, right: Interned<String> },
|
||||
WordPrefix { left: Interned<String>, right_prefix: Interned<String> },
|
||||
WordPrefixSwapped { left_prefix: Interned<String>, right: Interned<String> },
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
@ -40,32 +37,26 @@ impl RankingRuleGraphTrait for ProximityGraph {
|
||||
format!(", prox {proximity}, {} pairs", pairs.len())
|
||||
}
|
||||
|
||||
fn compute_docids<'db_cache, 'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn compute_docids<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge: &Self::EdgeDetails,
|
||||
) -> Result<roaring::RoaringBitmap> {
|
||||
compute_docids::compute_docids(index, txn, db_cache, edge)
|
||||
compute_docids::compute_docids(ctx, edge)
|
||||
}
|
||||
|
||||
fn build_visit_from_node<'transaction>(
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_from_node<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
from_node: &QueryNode,
|
||||
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
||||
build::visit_from_node(from_node)
|
||||
build::visit_from_node(ctx, from_node)
|
||||
}
|
||||
|
||||
fn build_visit_to_node<'from_data, 'transaction: 'from_data>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
|
||||
build::visit_to_node(index, txn, db_cache, to_node, from_node_data)
|
||||
build::visit_to_node(ctx, to_node, from_node_data)
|
||||
}
|
||||
|
||||
fn log_state(
|
||||
|
@ -1,23 +1,18 @@
|
||||
#![allow(clippy::too_many_arguments)]
|
||||
|
||||
use heed::RoTxn;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use super::edge_docids_cache::EdgeDocidsCache;
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
|
||||
use super::{RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
|
||||
use crate::new::BitmapOrAllRef;
|
||||
use crate::{Index, Result};
|
||||
use crate::new::{BitmapOrAllRef, SearchContext};
|
||||
use crate::Result;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
pub fn resolve_paths<'transaction>(
|
||||
// TODO: reduce the universe after computing each path
|
||||
// TODO: deserialize roaring bitmap within a universe
|
||||
pub fn resolve_paths<'search>(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge_docids_cache: &mut EdgeDocidsCache<G>,
|
||||
empty_paths_cache: &mut EmptyPathsCache,
|
||||
universe: &RoaringBitmap,
|
||||
@ -52,8 +47,8 @@ impl<G: RankingRuleGraphTrait> RankingRuleGraph<G> {
|
||||
let mut cached_edge_docids = vec![];
|
||||
'edge_loop: for edge_index in edge_indexes {
|
||||
visited_edges.push(edge_index);
|
||||
let edge_docids = edge_docids_cache
|
||||
.get_edge_docids(index, txn, db_cache, edge_index, self, universe)?;
|
||||
let edge_docids =
|
||||
edge_docids_cache.get_edge_docids(ctx, edge_index, self, universe)?;
|
||||
match edge_docids {
|
||||
BitmapOrAllRef::Bitmap(edge_docids) => {
|
||||
cached_edge_docids.push((edge_index, edge_docids.clone()));
|
||||
|
@ -1,19 +1,17 @@
|
||||
use heed::{BytesDecode, RoTxn};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::empty_paths_cache::EmptyPathsCache;
|
||||
|
||||
use super::{EdgeDetails, RankingRuleGraph, RankingRuleGraphTrait};
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::interner::Interned;
|
||||
use crate::new::logger::SearchLogger;
|
||||
use crate::new::query_term::{LocatedQueryTerm, Phrase, QueryTerm, WordDerivations};
|
||||
use crate::new::resolve_query_graph::resolve_phrase;
|
||||
use crate::new::{QueryGraph, QueryNode};
|
||||
use crate::{Index, Result, RoaringBitmapCodec};
|
||||
use crate::new::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::{Result, RoaringBitmapCodec};
|
||||
use heed::BytesDecode;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
#[derive(Clone)]
|
||||
pub enum TypoEdge {
|
||||
Phrase { phrase: Phrase },
|
||||
Phrase { phrase: Interned<Phrase> },
|
||||
Word { derivations: WordDerivations, nbr_typos: u8 },
|
||||
}
|
||||
|
||||
@ -30,14 +28,12 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
}
|
||||
}
|
||||
|
||||
fn compute_docids<'db_cache, 'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn compute_docids<'db_cache, 'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
edge: &Self::EdgeDetails,
|
||||
) -> Result<RoaringBitmap> {
|
||||
match edge {
|
||||
TypoEdge::Phrase { phrase } => resolve_phrase(index, txn, db_cache, phrase),
|
||||
TypoEdge::Phrase { phrase } => resolve_phrase(ctx, *phrase),
|
||||
TypoEdge::Word { derivations, nbr_typos } => {
|
||||
let words = match nbr_typos {
|
||||
0 => &derivations.zero_typo,
|
||||
@ -46,16 +42,14 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
_ => panic!(),
|
||||
};
|
||||
let mut docids = RoaringBitmap::new();
|
||||
for word in words.iter() {
|
||||
let Some(bytes) = db_cache.get_word_docids(index, txn, word)? else { continue };
|
||||
for word in words.iter().copied() {
|
||||
let Some(bytes) = ctx.get_word_docids(word)? else { continue };
|
||||
let bitmap =
|
||||
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
|
||||
docids |= bitmap;
|
||||
}
|
||||
if *nbr_typos == 0 {
|
||||
if let Some(bytes) =
|
||||
db_cache.get_prefix_docids(index, txn, &derivations.original)?
|
||||
{
|
||||
if let Some(bytes) = ctx.get_prefix_docids(derivations.original)? {
|
||||
let bitmap =
|
||||
RoaringBitmapCodec::bytes_decode(bytes).ok_or(heed::Error::Decoding)?;
|
||||
docids |= bitmap;
|
||||
@ -66,26 +60,22 @@ impl RankingRuleGraphTrait for TypoGraph {
|
||||
}
|
||||
}
|
||||
|
||||
fn build_visit_from_node<'transaction>(
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_from_node<'search>(
|
||||
_ctx: &mut SearchContext<'search>,
|
||||
_from_node: &QueryNode,
|
||||
) -> Result<Option<Self::BuildVisitedFromNode>> {
|
||||
Ok(Some(()))
|
||||
}
|
||||
|
||||
fn build_visit_to_node<'from_data, 'transaction: 'from_data>(
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
fn build_visit_to_node<'from_data, 'search: 'from_data>(
|
||||
_ctx: &mut SearchContext<'search>,
|
||||
to_node: &QueryNode,
|
||||
_from_node_data: &'from_data Self::BuildVisitedFromNode,
|
||||
) -> Result<Vec<(u8, EdgeDetails<Self::EdgeDetails>)>> {
|
||||
match to_node {
|
||||
QueryNode::Term(LocatedQueryTerm { value, .. }) => match value {
|
||||
QueryTerm::Phrase { phrase } => {
|
||||
Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase { phrase: phrase.clone() }))])
|
||||
&QueryTerm::Phrase { phrase } => {
|
||||
Ok(vec![(0, EdgeDetails::Data(TypoEdge::Phrase { phrase }))])
|
||||
}
|
||||
QueryTerm::Word { derivations } => {
|
||||
let mut edges = vec![];
|
||||
|
@ -1,33 +1,28 @@
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::db_cache::DatabaseCache;
|
||||
use super::logger::SearchLogger;
|
||||
|
||||
use super::QueryGraph;
|
||||
use super::SearchContext;
|
||||
use crate::new::graph_based_ranking_rule::GraphBasedRankingRule;
|
||||
use crate::new::ranking_rule_graph::ProximityGraph;
|
||||
use crate::new::ranking_rule_graph::TypoGraph;
|
||||
use crate::new::words::Words;
|
||||
use roaring::RoaringBitmap;
|
||||
// use crate::search::new::sort::Sort;
|
||||
use crate::{Index, Result, TermsMatchingStrategy};
|
||||
use crate::{Result, TermsMatchingStrategy};
|
||||
|
||||
pub trait RankingRuleOutputIter<'transaction, Query> {
|
||||
pub trait RankingRuleOutputIter<'search, Query> {
|
||||
fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>>;
|
||||
}
|
||||
|
||||
pub struct RankingRuleOutputIterWrapper<'transaction, Query> {
|
||||
iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'transaction>,
|
||||
pub struct RankingRuleOutputIterWrapper<'search, Query> {
|
||||
iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'search>,
|
||||
}
|
||||
impl<'transaction, Query> RankingRuleOutputIterWrapper<'transaction, Query> {
|
||||
pub fn new(
|
||||
iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'transaction>,
|
||||
) -> Self {
|
||||
impl<'search, Query> RankingRuleOutputIterWrapper<'search, Query> {
|
||||
pub fn new(iter: Box<dyn Iterator<Item = Result<RankingRuleOutput<Query>>> + 'search>) -> Self {
|
||||
Self { iter }
|
||||
}
|
||||
}
|
||||
impl<'transaction, Query> RankingRuleOutputIter<'transaction, Query>
|
||||
for RankingRuleOutputIterWrapper<'transaction, Query>
|
||||
impl<'search, Query> RankingRuleOutputIter<'search, Query>
|
||||
for RankingRuleOutputIterWrapper<'search, Query>
|
||||
{
|
||||
fn next_bucket(&mut self) -> Result<Option<RankingRuleOutput<Query>>> {
|
||||
match self.iter.next() {
|
||||
@ -44,7 +39,7 @@ pub struct PlaceholderQuery;
|
||||
impl RankingRuleQueryTrait for PlaceholderQuery {}
|
||||
impl RankingRuleQueryTrait for QueryGraph {}
|
||||
|
||||
pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
|
||||
pub trait RankingRule<'search, Query: RankingRuleQueryTrait> {
|
||||
fn id(&self) -> String;
|
||||
|
||||
/// Prepare the ranking rule such that it can start iterating over its
|
||||
@ -53,9 +48,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
|
||||
/// The given universe is the universe that will be given to [`next_bucket`](RankingRule::next_bucket).
|
||||
fn start_iteration(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
logger: &mut dyn SearchLogger<Query>,
|
||||
universe: &RoaringBitmap,
|
||||
query: &Query,
|
||||
@ -70,9 +63,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
|
||||
/// - the universe given to [`start_iteration`](RankingRule::start_iteration)
|
||||
fn next_bucket(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
logger: &mut dyn SearchLogger<Query>,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<Option<RankingRuleOutput<Query>>>;
|
||||
@ -81,9 +72,7 @@ pub trait RankingRule<'transaction, Query: RankingRuleQueryTrait> {
|
||||
/// The next call to this ranking rule, if any, will be [`start_iteration`](RankingRule::start_iteration).
|
||||
fn end_iteration(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
logger: &mut dyn SearchLogger<Query>,
|
||||
);
|
||||
}
|
||||
@ -98,11 +87,9 @@ pub struct RankingRuleOutput<Q> {
|
||||
|
||||
// TODO: can make it generic over the query type (either query graph or placeholder) fairly easily
|
||||
#[allow(clippy::too_many_arguments)]
|
||||
pub fn apply_ranking_rules<'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction heed::RoTxn,
|
||||
pub fn apply_ranking_rules<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
// TODO: ranking rules parameter
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
query_graph: &QueryGraph,
|
||||
universe: &RoaringBitmap,
|
||||
from: usize,
|
||||
@ -115,7 +102,7 @@ pub fn apply_ranking_rules<'transaction>(
|
||||
let proximity = &mut GraphBasedRankingRule::<ProximityGraph>::new("proximity".to_owned());
|
||||
let typo = &mut GraphBasedRankingRule::<TypoGraph>::new("typo".to_owned());
|
||||
// TODO: ranking rules given as argument
|
||||
let mut ranking_rules: Vec<&mut dyn RankingRule<'transaction, QueryGraph>> =
|
||||
let mut ranking_rules: Vec<&mut dyn RankingRule<'search, QueryGraph>> =
|
||||
vec![words, typo, proximity /*sort*/];
|
||||
|
||||
logger.ranking_rules(&ranking_rules);
|
||||
@ -126,7 +113,7 @@ pub fn apply_ranking_rules<'transaction>(
|
||||
|
||||
let ranking_rules_len = ranking_rules.len();
|
||||
logger.start_iteration_ranking_rule(0, ranking_rules[0], query_graph, universe);
|
||||
ranking_rules[0].start_iteration(index, txn, db_cache, logger, universe, query_graph)?;
|
||||
ranking_rules[0].start_iteration(ctx, logger, universe, query_graph)?;
|
||||
|
||||
let mut candidates = vec![RoaringBitmap::default(); ranking_rules_len];
|
||||
candidates[0] = universe.clone();
|
||||
@ -142,7 +129,7 @@ pub fn apply_ranking_rules<'transaction>(
|
||||
&candidates[cur_ranking_rule_index],
|
||||
);
|
||||
candidates[cur_ranking_rule_index].clear();
|
||||
ranking_rules[cur_ranking_rule_index].end_iteration(index, txn, db_cache, logger);
|
||||
ranking_rules[cur_ranking_rule_index].end_iteration(ctx, logger);
|
||||
if cur_ranking_rule_index == 0 {
|
||||
break;
|
||||
} else {
|
||||
@ -206,7 +193,7 @@ pub fn apply_ranking_rules<'transaction>(
|
||||
continue;
|
||||
}
|
||||
|
||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(index, txn, db_cache, logger, &candidates[cur_ranking_rule_index])? else {
|
||||
let Some(next_bucket) = ranking_rules[cur_ranking_rule_index].next_bucket(ctx, logger, &candidates[cur_ranking_rule_index])? else {
|
||||
// TODO: add remaining candidates automatically here?
|
||||
back!();
|
||||
continue;
|
||||
@ -239,9 +226,7 @@ pub fn apply_ranking_rules<'transaction>(
|
||||
&candidates[cur_ranking_rule_index],
|
||||
);
|
||||
ranking_rules[cur_ranking_rule_index].start_iteration(
|
||||
index,
|
||||
txn,
|
||||
db_cache,
|
||||
ctx,
|
||||
logger,
|
||||
&next_bucket.candidates,
|
||||
&next_bucket.query,
|
||||
@ -255,9 +240,7 @@ pub fn apply_ranking_rules<'transaction>(
|
||||
mod tests {
|
||||
// use crate::allocator::ALLOC;
|
||||
use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
|
||||
use crate::index::tests::TempIndex;
|
||||
use crate::new::db_cache::DatabaseCache;
|
||||
use crate::new::execute_search;
|
||||
use crate::new::{execute_search, SearchContext};
|
||||
use big_s::S;
|
||||
use heed::EnvOpenOptions;
|
||||
use maplit::hashset;
|
||||
@ -269,55 +252,6 @@ mod tests {
|
||||
use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
|
||||
use crate::{Criterion, Index, Object, Search, TermsMatchingStrategy};
|
||||
|
||||
#[test]
|
||||
fn execute_new_search() {
|
||||
let index = TempIndex::new();
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{
|
||||
"id": 7,
|
||||
"text": "the super quick super brown fox jumps over",
|
||||
},
|
||||
{
|
||||
"id": 8,
|
||||
"text": "the super quick brown fox jumps over",
|
||||
},
|
||||
{
|
||||
"id": 9,
|
||||
"text": "the quick super brown fox jumps over",
|
||||
},
|
||||
{
|
||||
"id": 10,
|
||||
"text": "the quick brown fox jumps over",
|
||||
},
|
||||
{
|
||||
"id": 11,
|
||||
"text": "the quick brown fox jumps over the lazy dog",
|
||||
},
|
||||
{
|
||||
"id": 12,
|
||||
"text": "the quick brown cat jumps over the lazy dog",
|
||||
},
|
||||
]))
|
||||
.unwrap();
|
||||
let txn = index.read_txn().unwrap();
|
||||
let mut db_cache = DatabaseCache::default();
|
||||
|
||||
let results = execute_search(
|
||||
&index,
|
||||
&txn,
|
||||
&mut db_cache,
|
||||
"releases from poison by the government",
|
||||
None,
|
||||
0,
|
||||
50,
|
||||
&mut DefaultSearchLogger,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
println!("{results:?}")
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn search_wiki_new() {
|
||||
let mut options = EnvOpenOptions::new();
|
||||
@ -331,24 +265,20 @@ mod tests {
|
||||
// loop {
|
||||
let start = Instant::now();
|
||||
|
||||
let mut db_cache = DatabaseCache::default();
|
||||
|
||||
let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||
// let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||
|
||||
let results = execute_search(
|
||||
&index,
|
||||
&txn,
|
||||
&mut db_cache,
|
||||
&mut SearchContext::new(&index, &txn),
|
||||
"releases from poison by the government",
|
||||
None,
|
||||
0,
|
||||
20,
|
||||
// &mut DefaultSearchLogger,
|
||||
&mut logger,
|
||||
&mut DefaultSearchLogger,
|
||||
// &mut logger,
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
logger.write_d2_description();
|
||||
// logger.write_d2_description();
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
@ -425,19 +355,15 @@ mod tests {
|
||||
let index = Index::new(options, "data_movies").unwrap();
|
||||
let txn = index.read_txn().unwrap();
|
||||
|
||||
let primary_key = index.primary_key(&txn).unwrap().unwrap();
|
||||
let primary_key = index.fields_ids_map(&txn).unwrap().id(primary_key).unwrap();
|
||||
// let primary_key = index.primary_key(&txn).unwrap().unwrap();
|
||||
// let primary_key = index.fields_ids_map(&txn).unwrap().id(primary_key).unwrap();
|
||||
// loop {
|
||||
let start = Instant::now();
|
||||
|
||||
let mut db_cache = DatabaseCache::default();
|
||||
|
||||
let mut logger = crate::new::logger::detailed::DetailedSearchLogger::new("log");
|
||||
|
||||
let mut ctx = SearchContext::new(&index, &txn);
|
||||
let results = execute_search(
|
||||
&index,
|
||||
&txn,
|
||||
&mut db_cache,
|
||||
&mut ctx,
|
||||
"releases from poison by the government",
|
||||
None,
|
||||
0,
|
||||
@ -447,24 +373,24 @@ mod tests {
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
logger.write_d2_description();
|
||||
logger.write_d2_description(&mut ctx);
|
||||
|
||||
let elapsed = start.elapsed();
|
||||
|
||||
let ids = index
|
||||
.documents(&txn, results.iter().copied())
|
||||
.unwrap()
|
||||
.into_iter()
|
||||
.map(|x| {
|
||||
let obkv = &x.1;
|
||||
let id = obkv.get(primary_key).unwrap();
|
||||
let id: serde_json::Value = serde_json::from_slice(id).unwrap();
|
||||
id.as_str().unwrap().to_owned()
|
||||
})
|
||||
.collect::<Vec<_>>();
|
||||
// let ids = index
|
||||
// .documents(&txn, results.iter().copied())
|
||||
// .unwrap()
|
||||
// .into_iter()
|
||||
// .map(|x| {
|
||||
// let obkv = &x.1;
|
||||
// let id = obkv.get(primary_key).unwrap();
|
||||
// let id: serde_json::Value = serde_json::from_slice(id).unwrap();
|
||||
// id.as_str().unwrap().to_owned()
|
||||
// })
|
||||
// .collect::<Vec<_>>();
|
||||
|
||||
println!("{}us: {results:?}", elapsed.as_micros());
|
||||
println!("external ids: {ids:?}");
|
||||
// println!("external ids: {ids:?}");
|
||||
// }
|
||||
}
|
||||
|
||||
|
@ -1,34 +1,28 @@
|
||||
use std::collections::VecDeque;
|
||||
|
||||
use fxhash::FxHashMap;
|
||||
use heed::{BytesDecode, RoTxn};
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
|
||||
use super::db_cache::DatabaseCache;
|
||||
use super::interner::Interned;
|
||||
use super::query_term::{Phrase, QueryTerm, WordDerivations};
|
||||
use super::{QueryGraph, QueryNode};
|
||||
|
||||
use crate::{CboRoaringBitmapCodec, Index, Result, RoaringBitmapCodec};
|
||||
use super::{QueryGraph, QueryNode, SearchContext};
|
||||
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
|
||||
use fxhash::FxHashMap;
|
||||
use heed::BytesDecode;
|
||||
use roaring::{MultiOps, RoaringBitmap};
|
||||
use std::collections::VecDeque;
|
||||
|
||||
// TODO: manual performance metrics: access to DB, bitmap deserializations/operations, etc.
|
||||
#[derive(Default)]
|
||||
pub struct NodeDocIdsCache {
|
||||
pub cache: FxHashMap<u32, RoaringBitmap>,
|
||||
}
|
||||
impl NodeDocIdsCache {
|
||||
fn get_docids<'cache, 'transaction>(
|
||||
impl<'search> SearchContext<'search> {
|
||||
fn get_node_docids<'cache>(
|
||||
&'cache mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
term: &QueryTerm,
|
||||
node_idx: u32,
|
||||
) -> Result<&'cache RoaringBitmap> {
|
||||
if self.cache.contains_key(&node_idx) {
|
||||
return Ok(&self.cache[&node_idx]);
|
||||
if self.node_docids_cache.cache.contains_key(&node_idx) {
|
||||
return Ok(&self.node_docids_cache.cache[&node_idx]);
|
||||
};
|
||||
let docids = match term {
|
||||
QueryTerm::Phrase { phrase } => resolve_phrase(index, txn, db_cache, phrase)?,
|
||||
QueryTerm::Phrase { phrase } => resolve_phrase(self, *phrase)?,
|
||||
QueryTerm::Word {
|
||||
derivations:
|
||||
WordDerivations {
|
||||
@ -42,15 +36,14 @@ impl NodeDocIdsCache {
|
||||
},
|
||||
} => {
|
||||
let mut or_docids = vec![];
|
||||
for word in zero_typo.iter().chain(one_typo.iter()).chain(two_typos.iter()) {
|
||||
if let Some(word_docids) = db_cache.get_word_docids(index, txn, word)? {
|
||||
for word in zero_typo.iter().chain(one_typo.iter()).chain(two_typos.iter()).copied()
|
||||
{
|
||||
if let Some(word_docids) = self.get_word_docids(word)? {
|
||||
or_docids.push(word_docids);
|
||||
}
|
||||
}
|
||||
if *use_prefix_db {
|
||||
if let Some(prefix_docids) =
|
||||
db_cache.get_prefix_docids(index, txn, original.as_str())?
|
||||
{
|
||||
if let Some(prefix_docids) = self.get_prefix_docids(*original)? {
|
||||
or_docids.push(prefix_docids);
|
||||
}
|
||||
}
|
||||
@ -58,32 +51,25 @@ impl NodeDocIdsCache {
|
||||
.into_iter()
|
||||
.map(|slice| RoaringBitmapCodec::bytes_decode(slice).unwrap())
|
||||
.collect::<Vec<_>>();
|
||||
for synonym in synonyms {
|
||||
for synonym in synonyms.iter().copied() {
|
||||
// TODO: cache resolve_phrase?
|
||||
docids.push(resolve_phrase(index, txn, db_cache, synonym)?);
|
||||
docids.push(resolve_phrase(self, synonym)?);
|
||||
}
|
||||
if let Some((left, right)) = split_words {
|
||||
if let Some(split_word_docids) =
|
||||
db_cache.get_word_pair_proximity_docids(index, txn, left, right, 1)?
|
||||
{
|
||||
docids.push(CboRoaringBitmapCodec::deserialize_from(split_word_docids)?);
|
||||
}
|
||||
if let Some(split_words) = split_words {
|
||||
docids.push(resolve_phrase(self, *split_words)?);
|
||||
}
|
||||
|
||||
MultiOps::union(docids)
|
||||
}
|
||||
};
|
||||
let _ = self.cache.insert(node_idx, docids);
|
||||
let docids = &self.cache[&node_idx];
|
||||
let _ = self.node_docids_cache.cache.insert(node_idx, docids);
|
||||
let docids = &self.node_docids_cache.cache[&node_idx];
|
||||
Ok(docids)
|
||||
}
|
||||
}
|
||||
|
||||
pub fn resolve_query_graph<'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
node_docids_cache: &mut NodeDocIdsCache,
|
||||
pub fn resolve_query_graph<'search>(
|
||||
ctx: &mut SearchContext<'search>,
|
||||
q: &QueryGraph,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<RoaringBitmap> {
|
||||
@ -111,8 +97,7 @@ pub fn resolve_query_graph<'transaction>(
|
||||
let node_docids = match n {
|
||||
QueryNode::Term(located_term) => {
|
||||
let term = &located_term.value;
|
||||
let derivations_docids =
|
||||
node_docids_cache.get_docids(index, txn, db_cache, term, node)?;
|
||||
let derivations_docids = ctx.get_node_docids(term, node)?;
|
||||
predecessors_docids & derivations_docids
|
||||
}
|
||||
QueryNode::Deleted => {
|
||||
@ -143,13 +128,8 @@ pub fn resolve_query_graph<'transaction>(
|
||||
panic!()
|
||||
}
|
||||
|
||||
pub fn resolve_phrase<'transaction>(
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
phrase: &Phrase,
|
||||
) -> Result<RoaringBitmap> {
|
||||
let Phrase { words } = phrase;
|
||||
pub fn resolve_phrase(ctx: &mut SearchContext, phrase: Interned<Phrase>) -> Result<RoaringBitmap> {
|
||||
let Phrase { words } = ctx.phrase_interner.get(phrase).clone();
|
||||
let mut candidates = RoaringBitmap::new();
|
||||
let mut first_iter = true;
|
||||
let winsize = words.len().min(3);
|
||||
@ -161,19 +141,19 @@ pub fn resolve_phrase<'transaction>(
|
||||
for win in words.windows(winsize) {
|
||||
// Get all the documents with the matching distance for each word pairs.
|
||||
let mut bitmaps = Vec::with_capacity(winsize.pow(2));
|
||||
for (offset, s1) in win
|
||||
for (offset, &s1) in win
|
||||
.iter()
|
||||
.enumerate()
|
||||
.filter_map(|(index, word)| word.as_ref().map(|word| (index, word)))
|
||||
{
|
||||
for (dist, s2) in win
|
||||
for (dist, &s2) in win
|
||||
.iter()
|
||||
.skip(offset + 1)
|
||||
.enumerate()
|
||||
.filter_map(|(index, word)| word.as_ref().map(|word| (index, word)))
|
||||
{
|
||||
if dist == 0 {
|
||||
match db_cache.get_word_pair_proximity_docids(index, txn, s1, s2, 1)? {
|
||||
match ctx.get_word_pair_proximity_docids(s1, s2, 1)? {
|
||||
Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?),
|
||||
// If there are no documents for this pair, there will be no
|
||||
// results for the phrase query.
|
||||
@ -182,13 +162,9 @@ pub fn resolve_phrase<'transaction>(
|
||||
} else {
|
||||
let mut bitmap = RoaringBitmap::new();
|
||||
for dist in 0..=dist {
|
||||
if let Some(m) = db_cache.get_word_pair_proximity_docids(
|
||||
index,
|
||||
txn,
|
||||
s1,
|
||||
s2,
|
||||
dist as u8 + 1,
|
||||
)? {
|
||||
if let Some(m) =
|
||||
ctx.get_word_pair_proximity_docids(s1, s2, dist as u8 + 1)?
|
||||
{
|
||||
bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?;
|
||||
}
|
||||
}
|
||||
|
@ -1,11 +1,7 @@
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::db_cache::DatabaseCache;
|
||||
use super::logger::SearchLogger;
|
||||
use super::{
|
||||
RankingRule, RankingRuleOutput, RankingRuleOutputIter, RankingRuleOutputIterWrapper,
|
||||
RankingRuleQueryTrait,
|
||||
RankingRuleQueryTrait, SearchContext,
|
||||
};
|
||||
use crate::{
|
||||
// facet::FacetType,
|
||||
@ -15,18 +11,19 @@ use crate::{
|
||||
Index,
|
||||
Result,
|
||||
};
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
pub struct Sort<'transaction, Query> {
|
||||
pub struct Sort<'search, Query> {
|
||||
field_name: String,
|
||||
field_id: Option<FieldId>,
|
||||
is_ascending: bool,
|
||||
original_query: Option<Query>,
|
||||
iter: Option<RankingRuleOutputIterWrapper<'transaction, Query>>,
|
||||
iter: Option<RankingRuleOutputIterWrapper<'search, Query>>,
|
||||
}
|
||||
impl<'transaction, Query> Sort<'transaction, Query> {
|
||||
pub fn new(
|
||||
impl<'search, Query> Sort<'search, Query> {
|
||||
pub fn _new(
|
||||
index: &Index,
|
||||
rtxn: &'transaction heed::RoTxn,
|
||||
rtxn: &'search heed::RoTxn,
|
||||
field_name: String,
|
||||
is_ascending: bool,
|
||||
) -> Result<Self> {
|
||||
@ -37,18 +34,14 @@ impl<'transaction, Query> Sort<'transaction, Query> {
|
||||
}
|
||||
}
|
||||
|
||||
impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query>
|
||||
for Sort<'transaction, Query>
|
||||
{
|
||||
impl<'search, Query: RankingRuleQueryTrait> RankingRule<'search, Query> for Sort<'search, Query> {
|
||||
fn id(&self) -> String {
|
||||
let Self { field_name, is_ascending, .. } = self;
|
||||
format!("{field_name}:{}", if *is_ascending { "asc" } else { "desc " })
|
||||
}
|
||||
fn start_iteration(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
_logger: &mut dyn SearchLogger<Query>,
|
||||
parent_candidates: &RoaringBitmap,
|
||||
parent_query_graph: &Query,
|
||||
@ -59,8 +52,8 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
|
||||
if self.is_ascending { ascending_facet_sort } else { descending_facet_sort };
|
||||
|
||||
let number_iter = make_iter(
|
||||
txn,
|
||||
index
|
||||
ctx.txn,
|
||||
ctx.index
|
||||
.facet_id_f64_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
field_id,
|
||||
@ -68,8 +61,8 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
|
||||
)?;
|
||||
|
||||
let string_iter = make_iter(
|
||||
txn,
|
||||
index
|
||||
ctx.txn,
|
||||
ctx.index
|
||||
.facet_id_string_docids
|
||||
.remap_key_type::<FacetGroupKeyCodec<ByteSliceRefCodec>>(),
|
||||
field_id,
|
||||
@ -91,9 +84,7 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
|
||||
|
||||
fn next_bucket(
|
||||
&mut self,
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
_ctx: &mut SearchContext<'search>,
|
||||
_logger: &mut dyn SearchLogger<Query>,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<Option<RankingRuleOutput<Query>>> {
|
||||
@ -110,9 +101,7 @@ impl<'transaction, Query: RankingRuleQueryTrait> RankingRule<'transaction, Query
|
||||
|
||||
fn end_iteration(
|
||||
&mut self,
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
_ctx: &mut SearchContext<'search>,
|
||||
_logger: &mut dyn SearchLogger<Query>,
|
||||
) {
|
||||
self.original_query = None;
|
||||
|
@ -1,13 +1,9 @@
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
use heed::RoTxn;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::db_cache::DatabaseCache;
|
||||
use super::logger::SearchLogger;
|
||||
use super::resolve_query_graph::{resolve_query_graph, NodeDocIdsCache};
|
||||
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput};
|
||||
use crate::{Index, Result, TermsMatchingStrategy};
|
||||
use super::resolve_query_graph::resolve_query_graph;
|
||||
use super::{QueryGraph, QueryNode, RankingRule, RankingRuleOutput, SearchContext};
|
||||
use crate::{Result, TermsMatchingStrategy};
|
||||
use roaring::RoaringBitmap;
|
||||
use std::collections::BTreeSet;
|
||||
|
||||
pub struct Words {
|
||||
exhausted: bool,
|
||||
@ -15,7 +11,6 @@ pub struct Words {
|
||||
iterating: bool,
|
||||
positions_to_remove: Vec<i8>,
|
||||
terms_matching_strategy: TermsMatchingStrategy,
|
||||
node_docids_cache: NodeDocIdsCache,
|
||||
}
|
||||
impl Words {
|
||||
pub fn new(terms_matching_strategy: TermsMatchingStrategy) -> Self {
|
||||
@ -25,20 +20,17 @@ impl Words {
|
||||
iterating: false,
|
||||
positions_to_remove: vec![],
|
||||
terms_matching_strategy,
|
||||
node_docids_cache: <_>::default(),
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
||||
impl<'search> RankingRule<'search, QueryGraph> for Words {
|
||||
fn id(&self) -> String {
|
||||
"words".to_owned()
|
||||
}
|
||||
fn start_iteration(
|
||||
&mut self,
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
_ctx: &mut SearchContext<'search>,
|
||||
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
_parent_candidates: &RoaringBitmap,
|
||||
parent_query_graph: &QueryGraph,
|
||||
@ -71,9 +63,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
||||
|
||||
fn next_bucket(
|
||||
&mut self,
|
||||
index: &Index,
|
||||
txn: &'transaction RoTxn,
|
||||
db_cache: &mut DatabaseCache<'transaction>,
|
||||
ctx: &mut SearchContext<'search>,
|
||||
logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
universe: &RoaringBitmap,
|
||||
) -> Result<Option<RankingRuleOutput<QueryGraph>>> {
|
||||
@ -87,14 +77,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
||||
|
||||
logger.log_words_state(query_graph);
|
||||
|
||||
let this_bucket = resolve_query_graph(
|
||||
index,
|
||||
txn,
|
||||
db_cache,
|
||||
&mut self.node_docids_cache,
|
||||
query_graph,
|
||||
universe,
|
||||
)?;
|
||||
let this_bucket = resolve_query_graph(ctx, query_graph, universe)?;
|
||||
|
||||
let child_query_graph = query_graph.clone();
|
||||
loop {
|
||||
@ -115,9 +98,7 @@ impl<'transaction> RankingRule<'transaction, QueryGraph> for Words {
|
||||
|
||||
fn end_iteration(
|
||||
&mut self,
|
||||
_index: &Index,
|
||||
_txn: &'transaction RoTxn,
|
||||
_db_cache: &mut DatabaseCache<'transaction>,
|
||||
_ctx: &mut SearchContext<'search>,
|
||||
_logger: &mut dyn SearchLogger<QueryGraph>,
|
||||
) {
|
||||
self.iterating = false;
|
||||
|
Loading…
x
Reference in New Issue
Block a user