Update the ConditionDocidsCache after change to RankingRuleGraphTrait

This commit is contained in:
Loïc Lecrenier 2023-03-30 11:41:20 +02:00
parent 5fd28620cd
commit ae6bb1ce17

View File

@ -1,27 +1,20 @@
use std::marker::PhantomData; use std::marker::PhantomData;
use fxhash::{FxHashMap, FxHashSet}; use fxhash::FxHashMap;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{RankingRuleGraph, RankingRuleGraphTrait}; use super::{ComputedCondition, RankingRuleGraph, RankingRuleGraphTrait};
use crate::search::new::interner::Interned; use crate::search::new::interner::Interned;
use crate::search::new::query_term::Phrase; use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::search::new::SearchContext; use crate::search::new::SearchContext;
use crate::Result; use crate::Result;
// TODO: give a generation to each universe, then be able to get the exact // TODO: give a generation to each universe, then be able to get the exact
// delta of docids between two universes of different generations! // delta of docids between two universes of different generations!
#[derive(Default)]
pub struct ComputedCondition {
docids: RoaringBitmap,
universe_len: u64,
used_words: FxHashSet<Interned<String>>,
used_phrases: FxHashSet<Interned<Phrase>>,
}
/// A cache storing the document ids associated with each ranking rule edge /// A cache storing the document ids associated with each ranking rule edge
pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> { pub struct ConditionDocIdsCache<G: RankingRuleGraphTrait> {
// TOOD: should be a mapped interner?
pub cache: FxHashMap<Interned<G::Condition>, ComputedCondition>, pub cache: FxHashMap<Interned<G::Condition>, ComputedCondition>,
_phantom: PhantomData<G>, _phantom: PhantomData<G>,
} }
@ -31,45 +24,39 @@ impl<G: RankingRuleGraphTrait> Default for ConditionDocIdsCache<G> {
} }
} }
impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> { impl<G: RankingRuleGraphTrait> ConditionDocIdsCache<G> {
pub fn get_condition_used_words_and_phrases( pub fn get_subsets_used_by_condition(
&mut self, &mut self,
interned_condition: Interned<G::Condition>, interned_condition: Interned<G::Condition>,
) -> (&FxHashSet<Interned<String>>, &FxHashSet<Interned<Phrase>>) { ) -> (&Option<LocatedQueryTermSubset>, &LocatedQueryTermSubset) {
let ComputedCondition { used_words, used_phrases, .. } = &self.cache[&interned_condition]; let c = &self.cache[&interned_condition];
(used_words, used_phrases) (&c.start_term_subset, &c.end_term_subset)
} }
/// Retrieve the document ids for the given edge condition. /// Retrieve the document ids for the given edge condition.
/// ///
/// If the cache does not yet contain these docids, they are computed /// If the cache does not yet contain these docids, they are computed
/// and inserted in the cache. /// and inserted in the cache.
pub fn get_condition_docids<'s>( pub fn get_computed_condition<'s>(
&'s mut self, &'s mut self,
ctx: &mut SearchContext, ctx: &mut SearchContext,
interned_condition: Interned<G::Condition>, interned_condition: Interned<G::Condition>,
graph: &mut RankingRuleGraph<G>, graph: &mut RankingRuleGraph<G>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
) -> Result<&'s RoaringBitmap> { ) -> Result<&'s ComputedCondition> {
if self.cache.contains_key(&interned_condition) { if self.cache.contains_key(&interned_condition) {
// TODO compare length of universe compared to the one in self let computed = self.cache.get_mut(&interned_condition).unwrap();
// if it is smaller, then update the value if computed.universe_len == universe.len() {
let ComputedCondition { docids, universe_len, .. } = return Ok(computed);
self.cache.entry(interned_condition).or_default();
if *universe_len == universe.len() {
return Ok(docids);
} else { } else {
*docids &= universe; computed.docids &= universe;
*universe_len = universe.len(); computed.universe_len = universe.len();
return Ok(docids); return Ok(computed);
} }
} }
let condition = graph.conditions_interner.get_mut(interned_condition); let condition = graph.conditions_interner.get_mut(interned_condition);
let (docids, used_words, used_phrases) = G::resolve_condition(ctx, condition, universe)?; let computed = G::resolve_condition(ctx, condition, universe)?;
let _ = self.cache.insert( // TODO: if computed.universe_len != universe.len() ?
interned_condition, let _ = self.cache.insert(interned_condition, computed);
ComputedCondition { docids, universe_len: universe.len(), used_words, used_phrases }, let computed = &self.cache[&interned_condition];
); Ok(computed)
let ComputedCondition { docids, .. } = &self.cache[&interned_condition];
Ok(docids)
} }
} }