Everyone uses the SearchContext::word_docids instead of get_db_word_docids

make get_db_word_docids private
This commit is contained in:
Louis Dureuil 2023-04-11 18:27:41 +02:00 committed by Loïc Lecrenier
parent 325f17488a
commit 5ab46324c4
7 changed files with 46 additions and 27 deletions

View File

@ -89,7 +89,7 @@ impl<'ctx> SearchContext<'ctx> {
} }
/// Retrieve or insert the given value in the `word_docids` database. /// Retrieve or insert the given value in the `word_docids` database.
pub fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> { fn get_db_word_docids(&mut self, word: Interned<String>) -> Result<Option<RoaringBitmap>> {
DatabaseCache::get_value( DatabaseCache::get_value(
self.txn, self.txn,
word, word,

View File

@ -427,7 +427,7 @@ fill: \"#B6E2D3\"
)?; )?;
for w in term_subset.all_single_words_except_prefix_db(ctx)? { for w in term_subset.all_single_words_except_prefix_db(ctx)? {
let w = ctx.word_interner.get(w); let w = ctx.word_interner.get(w.interned());
writeln!(file, "{w}: word")?; writeln!(file, "{w}: word")?;
} }
for p in term_subset.all_phrases(ctx)? { for p in term_subset.all_phrases(ctx)? {

View File

@ -50,6 +50,8 @@ use ranking_rules::{BoxRankingRule, RankingRule};
use resolve_query_graph::compute_query_graph_docids; use resolve_query_graph::compute_query_graph_docids;
use sort::Sort; use sort::Sort;
use self::interner::Interned;
/// A structure used throughout the execution of a search query. /// A structure used throughout the execution of a search query.
pub struct SearchContext<'ctx> { pub struct SearchContext<'ctx> {
pub index: &'ctx Index, pub index: &'ctx Index,

View File

@ -3,18 +3,18 @@ mod ntypo_subset;
mod parse_query; mod parse_query;
mod phrase; mod phrase;
use super::interner::{DedupInterner, Interned};
use super::{limits, SearchContext};
use crate::Result;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::ops::RangeInclusive; use std::ops::RangeInclusive;
use compute_derivations::partially_initialized_term_from_word;
use either::Either; use either::Either;
pub use ntypo_subset::NTypoTermSubset; pub use ntypo_subset::NTypoTermSubset;
pub use parse_query::{located_query_terms_from_string, make_ngram, number_of_typos_allowed}; pub use parse_query::{located_query_terms_from_string, make_ngram, number_of_typos_allowed};
pub use phrase::Phrase; pub use phrase::Phrase;
use compute_derivations::partially_initialized_term_from_word; use super::interner::{DedupInterner, Interned};
use super::{limits, SearchContext, Word};
use crate::Result;
/// A set of word derivations attached to a location in the search query. /// A set of word derivations attached to a location in the search query.
#[derive(Clone, PartialEq, Eq, Hash)] #[derive(Clone, PartialEq, Eq, Hash)]
@ -180,7 +180,7 @@ impl QueryTermSubset {
pub fn all_single_words_except_prefix_db( pub fn all_single_words_except_prefix_db(
&self, &self,
ctx: &mut SearchContext, ctx: &mut SearchContext,
) -> Result<BTreeSet<Interned<String>>> { ) -> Result<BTreeSet<Word>> {
let mut result = BTreeSet::default(); let mut result = BTreeSet::default();
// TODO: a compute_partially funtion // TODO: a compute_partially funtion
if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() { if !self.one_typo_subset.is_empty() || !self.two_typo_subset.is_empty() {
@ -197,8 +197,20 @@ impl QueryTermSubset {
synonyms: _, synonyms: _,
use_prefix_db: _, use_prefix_db: _,
} = &original.zero_typo; } = &original.zero_typo;
result.extend(zero_typo.iter().copied()); result.extend(zero_typo.iter().copied().map(|w| {
result.extend(prefix_of.iter().copied()); if original.ngram_words.is_some() {
Word::Derived(w)
} else {
Word::Original(w)
}
}));
result.extend(prefix_of.iter().copied().map(|w| {
if original.ngram_words.is_some() {
Word::Derived(w)
} else {
Word::Original(w)
}
}));
} }
NTypoTermSubset::Subset { words, phrases: _ } => { NTypoTermSubset::Subset { words, phrases: _ } => {
let ZeroTypoTerm { let ZeroTypoTerm {
@ -210,10 +222,14 @@ impl QueryTermSubset {
} = &original.zero_typo; } = &original.zero_typo;
if let Some(zero_typo) = zero_typo { if let Some(zero_typo) = zero_typo {
if words.contains(zero_typo) { if words.contains(zero_typo) {
result.insert(*zero_typo); if original.ngram_words.is_some() {
result.insert(Word::Derived(*zero_typo));
} else {
result.insert(Word::Original(*zero_typo));
} }
} }
result.extend(prefix_of.intersection(words).copied()); }
result.extend(prefix_of.intersection(words).copied().map(Word::Derived));
} }
NTypoTermSubset::Nothing => {} NTypoTermSubset::Nothing => {}
} }
@ -223,13 +239,13 @@ impl QueryTermSubset {
let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else { let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else {
panic!() panic!()
}; };
result.extend(one_typo.iter().copied()) result.extend(one_typo.iter().copied().map(Word::Derived))
} }
NTypoTermSubset::Subset { words, phrases: _ } => { NTypoTermSubset::Subset { words, phrases: _ } => {
let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else { let Lazy::Init(OneTypoTerm { split_words: _, one_typo }) = &original.one_typo else {
panic!() panic!()
}; };
result.extend(one_typo.intersection(words)); result.extend(one_typo.intersection(words).copied().map(Word::Derived));
} }
NTypoTermSubset::Nothing => {} NTypoTermSubset::Nothing => {}
}; };
@ -239,13 +255,13 @@ impl QueryTermSubset {
let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else {
panic!() panic!()
}; };
result.extend(two_typos.iter().copied()); result.extend(two_typos.iter().copied().map(Word::Derived));
} }
NTypoTermSubset::Subset { words, phrases: _ } => { NTypoTermSubset::Subset { words, phrases: _ } => {
let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else { let Lazy::Init(TwoTypoTerm { two_typos }) = &original.two_typo else {
panic!() panic!()
}; };
result.extend(two_typos.intersection(words)); result.extend(two_typos.intersection(words).copied().map(Word::Derived));
} }
NTypoTermSubset::Nothing => {} NTypoTermSubset::Nothing => {}
}; };

View File

@ -3,7 +3,8 @@ use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait}; use super::{ComputedCondition, RankingRuleGraphTrait};
use crate::search::new::interner::{DedupInterner, Interned}; use crate::search::new::interner::{DedupInterner, Interned};
use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset}; use crate::search::new::query_term::{ExactTerm, LocatedQueryTermSubset};
use crate::{Result, RoaringBitmapCodec, SearchContext}; use crate::search::new::Word;
use crate::{Result, SearchContext};
#[derive(Clone, PartialEq, Eq, Hash)] #[derive(Clone, PartialEq, Eq, Hash)]
pub enum ExactnessCondition { pub enum ExactnessCondition {
@ -26,7 +27,7 @@ fn compute_docids(
let mut candidates = match exact_term { let mut candidates = match exact_term {
ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(), ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(),
ExactTerm::Word(word) => { ExactTerm::Word(word) => {
if let Some(word_candidates) = ctx.get_db_word_docids(word)? { if let Some(word_candidates) = ctx.word_docids(Word::Original(word))? {
word_candidates word_candidates
} else { } else {
return Ok(Default::default()); return Ok(Default::default());

View File

@ -9,7 +9,7 @@ use crate::search::new::interner::Interned;
use crate::search::new::query_term::{Phrase, QueryTermSubset}; use crate::search::new::query_term::{Phrase, QueryTermSubset};
use crate::search::new::ranking_rule_graph::ComputedCondition; use crate::search::new::ranking_rule_graph::ComputedCondition;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
use crate::search::new::SearchContext; use crate::search::new::{SearchContext, Word};
use crate::Result; use crate::Result;
pub fn compute_docids( pub fn compute_docids(
@ -54,7 +54,7 @@ pub fn compute_docids(
{ {
compute_prefix_edges( compute_prefix_edges(
ctx, ctx,
left_word, left_word.interned(),
right_prefix, right_prefix,
left_phrase, left_phrase,
forward_proximity, forward_proximity,
@ -91,7 +91,7 @@ pub fn compute_docids(
if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) { if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) {
continue; continue;
} }
} else if let Some(left_word_docids) = ctx.get_db_word_docids(left_word)? { } else if let Some(left_word_docids) = ctx.word_docids(left_word)? {
if universe.is_disjoint(&left_word_docids) { if universe.is_disjoint(&left_word_docids) {
continue; continue;
} }
@ -101,7 +101,7 @@ pub fn compute_docids(
for (right_word, right_phrase) in right_derivs { for (right_word, right_phrase) in right_derivs {
compute_non_prefix_edges( compute_non_prefix_edges(
ctx, ctx,
left_word, left_word.interned(),
right_word, right_word,
left_phrase, left_phrase,
right_phrase, right_phrase,
@ -243,7 +243,7 @@ fn compute_non_prefix_edges(
fn last_words_of_term_derivations( fn last_words_of_term_derivations(
ctx: &mut SearchContext, ctx: &mut SearchContext,
t: &QueryTermSubset, t: &QueryTermSubset,
) -> Result<BTreeSet<(Option<Interned<Phrase>>, Interned<String>)>> { ) -> Result<BTreeSet<(Option<Interned<Phrase>>, Word)>> {
let mut result = BTreeSet::new(); let mut result = BTreeSet::new();
for w in t.all_single_words_except_prefix_db(ctx)? { for w in t.all_single_words_except_prefix_db(ctx)? {
@ -253,7 +253,7 @@ fn last_words_of_term_derivations(
let phrase = ctx.phrase_interner.get(p); let phrase = ctx.phrase_interner.get(p);
let last_term_of_phrase = phrase.words.last().unwrap(); let last_term_of_phrase = phrase.words.last().unwrap();
if let Some(last_word) = last_term_of_phrase { if let Some(last_word) = last_term_of_phrase {
result.insert((Some(p), *last_word)); result.insert((Some(p), Word::Original(*last_word)));
} }
} }
@ -266,7 +266,7 @@ fn first_word_of_term_iter(
let mut result = BTreeSet::new(); let mut result = BTreeSet::new();
let all_words = t.all_single_words_except_prefix_db(ctx)?; let all_words = t.all_single_words_except_prefix_db(ctx)?;
for w in all_words { for w in all_words {
result.insert((w, None)); result.insert((w.interned(), None));
} }
for p in t.all_phrases(ctx)? { for p in t.all_phrases(ctx)? {
let phrase = ctx.phrase_interner.get(p); let phrase = ctx.phrase_interner.get(p);

View File

@ -9,7 +9,7 @@ use super::interner::Interned;
use super::query_graph::QueryNodeData; use super::query_graph::QueryNodeData;
use super::query_term::{Phrase, QueryTermSubset}; use super::query_term::{Phrase, QueryTermSubset};
use super::small_bitmap::SmallBitmap; use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, SearchContext}; use super::{QueryGraph, SearchContext, Word};
use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::Result; use crate::Result;
@ -35,7 +35,7 @@ pub fn compute_query_term_subset_docids(
) -> Result<RoaringBitmap> { ) -> Result<RoaringBitmap> {
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for word in term.all_single_words_except_prefix_db(ctx)? { for word in term.all_single_words_except_prefix_db(ctx)? {
if let Some(word_docids) = ctx.get_db_word_docids(word)? { if let Some(word_docids) = ctx.word_docids(word)? {
docids |= word_docids; docids |= word_docids;
} }
} }
@ -125,7 +125,7 @@ pub fn compute_phrase_docids(
} }
if words.len() == 1 { if words.len() == 1 {
if let Some(word) = &words[0] { if let Some(word) = &words[0] {
if let Some(word_docids) = ctx.get_db_word_docids(*word)? { if let Some(word_docids) = ctx.word_docids(Word::Original(*word))? {
return Ok(word_docids); return Ok(word_docids);
} else { } else {
return Ok(RoaringBitmap::new()); return Ok(RoaringBitmap::new());