Update call sites

This commit is contained in:
Louis Dureuil 2023-04-11 15:31:40 +02:00 committed by Loïc Lecrenier
parent 244003e36f
commit e7ff987c46
5 changed files with 23 additions and 39 deletions

View File

@ -1,11 +1,10 @@
use heed::BytesDecode;
use roaring::{MultiOps, RoaringBitmap}; use roaring::{MultiOps, RoaringBitmap};
use super::query_graph::QueryGraph; use super::query_graph::QueryGraph;
use super::ranking_rules::{RankingRule, RankingRuleOutput}; use super::ranking_rules::{RankingRule, RankingRuleOutput};
use crate::search::new::query_graph::QueryNodeData; use crate::search::new::query_graph::QueryNodeData;
use crate::search::new::query_term::ExactTerm; use crate::search::new::query_term::ExactTerm;
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger}; use crate::{Result, SearchContext, SearchLogger};
/// A ranking rule that produces 3 disjoint buckets: /// A ranking rule that produces 3 disjoint buckets:
/// ///
@ -161,10 +160,8 @@ impl State {
// Note: Since the position is stored bucketed in word_position_docids, for queries with a lot of // Note: Since the position is stored bucketed in word_position_docids, for queries with a lot of
// longer phrases we'll be losing on precision here. // longer phrases we'll be losing on precision here.
let bucketed_position = crate::bucketed_position(position + offset); let bucketed_position = crate::bucketed_position(position + offset);
let word_position_docids = CboRoaringBitmapCodec::bytes_decode( let word_position_docids =
ctx.get_db_word_position_docids(*word, bucketed_position)?.unwrap_or_default(), ctx.get_db_word_position_docids(*word, bucketed_position)?.unwrap_or_default();
)
.unwrap_or_default();
candidates &= word_position_docids; candidates &= word_position_docids;
if candidates.is_empty() { if candidates.is_empty() {
return Ok(State::Empty(query_graph.clone())); return Ok(State::Empty(query_graph.clone()));
@ -191,11 +188,7 @@ impl State {
// ignore stop words words in phrases // ignore stop words words in phrases
.flatten() .flatten()
.map(|word| -> Result<_> { .map(|word| -> Result<_> {
Ok(ctx Ok(ctx.get_db_word_fid_docids(*word, fid)?.unwrap_or_default())
.get_db_word_fid_docids(*word, fid)?
.map(CboRoaringBitmapCodec::bytes_decode)
.unwrap_or_default()
.unwrap_or_default())
}), }),
)?; )?;
intersection &= &candidates; intersection &= &candidates;

View File

@ -1,17 +1,17 @@
use fst::automaton::Str;
use fst::{Automaton, IntoStreamer, Streamer};
use heed::types::DecodeIgnore;
use heed::BytesDecode;
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::BTreeSet; use std::collections::BTreeSet;
use std::ops::ControlFlow; use std::ops::ControlFlow;
use fst::automaton::Str;
use fst::{Automaton, IntoStreamer, Streamer};
use heed::types::DecodeIgnore;
use super::*; use super::*;
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union}; use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
use crate::search::new::query_term::TwoTypoTerm; use crate::search::new::query_term::TwoTypoTerm;
use crate::search::new::{limits, SearchContext}; use crate::search::new::{limits, SearchContext};
use crate::search::{build_dfa, get_first}; use crate::search::{build_dfa, get_first};
use crate::{CboRoaringBitmapLenCodec, Result, MAX_WORD_LENGTH}; use crate::{Result, MAX_WORD_LENGTH};
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
pub enum NumberOfTypos { pub enum NumberOfTypos {
@ -385,9 +385,7 @@ fn split_best_frequency(
let left = ctx.word_interner.insert(left.to_owned()); let left = ctx.word_interner.insert(left.to_owned());
let right = ctx.word_interner.insert(right.to_owned()); let right = ctx.word_interner.insert(right.to_owned());
if let Some(docid_bytes) = ctx.get_db_word_pair_proximity_docids(left, right, 1)? { if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? {
let frequency =
CboRoaringBitmapLenCodec::bytes_decode(docid_bytes).ok_or(heed::Error::Decoding)?;
if best.map_or(true, |(old, _, _)| frequency > old) { if best.map_or(true, |(old, _, _)| frequency > old) {
best = Some((frequency, left, right)); best = Some((frequency, left, right));
} }

View File

@ -1,4 +1,3 @@
use heed::BytesDecode;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{ComputedCondition, RankingRuleGraphTrait}; use super::{ComputedCondition, RankingRuleGraphTrait};
@ -28,7 +27,7 @@ fn compute_docids(
ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(), ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(),
ExactTerm::Word(word) => { ExactTerm::Word(word) => {
if let Some(word_candidates) = ctx.get_db_word_docids(word)? { if let Some(word_candidates) = ctx.get_db_word_docids(word)? {
RoaringBitmapCodec::bytes_decode(word_candidates).ok_or(heed::Error::Decoding)? word_candidates
} else { } else {
return Ok(Default::default()); return Ok(Default::default());
} }

View File

@ -2,7 +2,6 @@
use std::collections::BTreeSet; use std::collections::BTreeSet;
use heed::BytesDecode;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::ProximityCondition; use super::ProximityCondition;
@ -11,7 +10,7 @@ use crate::search::new::query_term::{Phrase, QueryTermSubset};
use crate::search::new::ranking_rule_graph::ComputedCondition; use crate::search::new::ranking_rule_graph::ComputedCondition;
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
use crate::search::new::SearchContext; use crate::search::new::SearchContext;
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec}; use crate::Result;
pub fn compute_docids( pub fn compute_docids(
ctx: &mut SearchContext, ctx: &mut SearchContext,
@ -92,9 +91,7 @@ pub fn compute_docids(
if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) { if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) {
continue; continue;
} }
} else if let Some(lw_bytes) = ctx.get_db_word_docids(left_word)? { } else if let Some(left_word_docids) = ctx.get_db_word_docids(left_word)? {
let left_word_docids =
RoaringBitmapCodec::bytes_decode(lw_bytes).ok_or(heed::Error::Decoding)?;
if universe.is_disjoint(&left_word_docids) { if universe.is_disjoint(&left_word_docids) {
continue; continue;
} }
@ -155,7 +152,7 @@ fn compute_prefix_edges(
if let Some(new_docids) = if let Some(new_docids) =
ctx.get_db_word_prefix_pair_proximity_docids(left_word, right_prefix, forward_proximity)? ctx.get_db_word_prefix_pair_proximity_docids(left_word, right_prefix, forward_proximity)?
{ {
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?; let new_docids = &universe & new_docids;
if !new_docids.is_empty() { if !new_docids.is_empty() {
used_left_words.insert(left_word); used_left_words.insert(left_word);
used_right_prefix.insert(right_prefix); used_right_prefix.insert(right_prefix);
@ -170,7 +167,7 @@ fn compute_prefix_edges(
left_word, left_word,
backward_proximity, backward_proximity,
)? { )? {
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?; let new_docids = &universe & new_docids;
if !new_docids.is_empty() { if !new_docids.is_empty() {
used_left_words.insert(left_word); used_left_words.insert(left_word);
used_right_prefix.insert(right_prefix); used_right_prefix.insert(right_prefix);
@ -217,7 +214,7 @@ fn compute_non_prefix_edges(
if let Some(new_docids) = if let Some(new_docids) =
ctx.get_db_word_pair_proximity_docids(word1, word2, forward_proximity)? ctx.get_db_word_pair_proximity_docids(word1, word2, forward_proximity)?
{ {
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?; let new_docids = &universe & new_docids;
if !new_docids.is_empty() { if !new_docids.is_empty() {
used_left_words.insert(word1); used_left_words.insert(word1);
used_right_words.insert(word2); used_right_words.insert(word2);
@ -231,7 +228,7 @@ fn compute_non_prefix_edges(
if let Some(new_docids) = if let Some(new_docids) =
ctx.get_db_word_pair_proximity_docids(word2, word1, backward_proximity)? ctx.get_db_word_pair_proximity_docids(word2, word1, backward_proximity)?
{ {
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?; let new_docids = &universe & new_docids;
if !new_docids.is_empty() { if !new_docids.is_empty() {
used_left_words.insert(word2); used_left_words.insert(word2);
used_right_words.insert(word1); used_right_words.insert(word1);

View File

@ -3,7 +3,6 @@
use std::collections::VecDeque; use std::collections::VecDeque;
use fxhash::FxHashMap; use fxhash::FxHashMap;
use heed::BytesDecode;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::interner::Interned; use super::interner::Interned;
@ -12,7 +11,7 @@ use super::query_term::{Phrase, QueryTermSubset};
use super::small_bitmap::SmallBitmap; use super::small_bitmap::SmallBitmap;
use super::{QueryGraph, SearchContext}; use super::{QueryGraph, SearchContext};
use crate::search::new::query_term::LocatedQueryTermSubset; use crate::search::new::query_term::LocatedQueryTermSubset;
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec}; use crate::Result;
#[derive(Default)] #[derive(Default)]
pub struct PhraseDocIdsCache { pub struct PhraseDocIdsCache {
@ -37,7 +36,7 @@ pub fn compute_query_term_subset_docids(
let mut docids = RoaringBitmap::new(); let mut docids = RoaringBitmap::new();
for word in term.all_single_words_except_prefix_db(ctx)? { for word in term.all_single_words_except_prefix_db(ctx)? {
if let Some(word_docids) = ctx.get_db_word_docids(word)? { if let Some(word_docids) = ctx.get_db_word_docids(word)? {
docids |= RoaringBitmapCodec::bytes_decode(word_docids).ok_or(heed::Error::Decoding)?; docids |= word_docids;
} }
} }
for phrase in term.all_phrases(ctx)? { for phrase in term.all_phrases(ctx)? {
@ -46,8 +45,7 @@ pub fn compute_query_term_subset_docids(
if let Some(prefix) = term.use_prefix_db(ctx) { if let Some(prefix) = term.use_prefix_db(ctx) {
if let Some(prefix_docids) = ctx.get_db_word_prefix_docids(prefix)? { if let Some(prefix_docids) = ctx.get_db_word_prefix_docids(prefix)? {
docids |= docids |= prefix_docids;
RoaringBitmapCodec::bytes_decode(prefix_docids).ok_or(heed::Error::Decoding)?;
} }
} }
@ -128,8 +126,7 @@ pub fn compute_phrase_docids(
if words.len() == 1 { if words.len() == 1 {
if let Some(word) = &words[0] { if let Some(word) = &words[0] {
if let Some(word_docids) = ctx.get_db_word_docids(*word)? { if let Some(word_docids) = ctx.get_db_word_docids(*word)? {
return RoaringBitmapCodec::bytes_decode(word_docids) return Ok(word_docids);
.ok_or(heed::Error::Decoding.into());
} else { } else {
return Ok(RoaringBitmap::new()); return Ok(RoaringBitmap::new());
} }
@ -158,7 +155,7 @@ pub fn compute_phrase_docids(
{ {
if dist == 0 { if dist == 0 {
match ctx.get_db_word_pair_proximity_docids(s1, s2, 1)? { match ctx.get_db_word_pair_proximity_docids(s1, s2, 1)? {
Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?), Some(m) => bitmaps.push(m),
// If there are no documents for this pair, there will be no // If there are no documents for this pair, there will be no
// results for the phrase query. // results for the phrase query.
None => return Ok(RoaringBitmap::new()), None => return Ok(RoaringBitmap::new()),
@ -169,7 +166,7 @@ pub fn compute_phrase_docids(
if let Some(m) = if let Some(m) =
ctx.get_db_word_pair_proximity_docids(s1, s2, dist as u8 + 1)? ctx.get_db_word_pair_proximity_docids(s1, s2, dist as u8 + 1)?
{ {
bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?; bitmap |= m;
} }
} }
if bitmap.is_empty() { if bitmap.is_empty() {