diff --git a/milli/src/search/new/exact_attribute.rs b/milli/src/search/new/exact_attribute.rs index 3a31f6a75..bc0195ebc 100644 --- a/milli/src/search/new/exact_attribute.rs +++ b/milli/src/search/new/exact_attribute.rs @@ -1,11 +1,10 @@ -use heed::BytesDecode; use roaring::{MultiOps, RoaringBitmap}; use super::query_graph::QueryGraph; use super::ranking_rules::{RankingRule, RankingRuleOutput}; use crate::search::new::query_graph::QueryNodeData; use crate::search::new::query_term::ExactTerm; -use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger}; +use crate::{Result, SearchContext, SearchLogger}; /// A ranking rule that produces 3 disjoint buckets: /// @@ -161,10 +160,8 @@ impl State { // Note: Since the position is stored bucketed in word_position_docids, for queries with a lot of // longer phrases we'll be losing on precision here. let bucketed_position = crate::bucketed_position(position + offset); - let word_position_docids = CboRoaringBitmapCodec::bytes_decode( - ctx.get_db_word_position_docids(*word, bucketed_position)?.unwrap_or_default(), - ) - .unwrap_or_default(); + let word_position_docids = + ctx.get_db_word_position_docids(*word, bucketed_position)?.unwrap_or_default(); candidates &= word_position_docids; if candidates.is_empty() { return Ok(State::Empty(query_graph.clone())); @@ -191,11 +188,7 @@ impl State { // ignore stop words words in phrases .flatten() .map(|word| -> Result<_> { - Ok(ctx - .get_db_word_fid_docids(*word, fid)? - .map(CboRoaringBitmapCodec::bytes_decode) - .unwrap_or_default() - .unwrap_or_default()) + Ok(ctx.get_db_word_fid_docids(*word, fid)?.unwrap_or_default()) }), )?; intersection &= &candidates; diff --git a/milli/src/search/new/query_term/compute_derivations.rs b/milli/src/search/new/query_term/compute_derivations.rs index 03d92572e..12b8c3832 100644 --- a/milli/src/search/new/query_term/compute_derivations.rs +++ b/milli/src/search/new/query_term/compute_derivations.rs @@ -1,17 +1,17 @@ -use fst::automaton::Str; -use fst::{Automaton, IntoStreamer, Streamer}; -use heed::types::DecodeIgnore; -use heed::BytesDecode; use std::borrow::Cow; use std::collections::BTreeSet; use std::ops::ControlFlow; +use fst::automaton::Str; +use fst::{Automaton, IntoStreamer, Streamer}; +use heed::types::DecodeIgnore; + use super::*; use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union}; use crate::search::new::query_term::TwoTypoTerm; use crate::search::new::{limits, SearchContext}; use crate::search::{build_dfa, get_first}; -use crate::{CboRoaringBitmapLenCodec, Result, MAX_WORD_LENGTH}; +use crate::{Result, MAX_WORD_LENGTH}; #[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)] pub enum NumberOfTypos { @@ -385,9 +385,7 @@ fn split_best_frequency( let left = ctx.word_interner.insert(left.to_owned()); let right = ctx.word_interner.insert(right.to_owned()); - if let Some(docid_bytes) = ctx.get_db_word_pair_proximity_docids(left, right, 1)? { - let frequency = - CboRoaringBitmapLenCodec::bytes_decode(docid_bytes).ok_or(heed::Error::Decoding)?; + if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? { if best.map_or(true, |(old, _, _)| frequency > old) { best = Some((frequency, left, right)); } diff --git a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs index 55c4497dd..4a3dd6549 100644 --- a/milli/src/search/new/ranking_rule_graph/exactness/mod.rs +++ b/milli/src/search/new/ranking_rule_graph/exactness/mod.rs @@ -1,4 +1,3 @@ -use heed::BytesDecode; use roaring::RoaringBitmap; use super::{ComputedCondition, RankingRuleGraphTrait}; @@ -28,7 +27,7 @@ fn compute_docids( ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(), ExactTerm::Word(word) => { if let Some(word_candidates) = ctx.get_db_word_docids(word)? { - RoaringBitmapCodec::bytes_decode(word_candidates).ok_or(heed::Error::Decoding)? + word_candidates } else { return Ok(Default::default()); } diff --git a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs index 07bd102ca..b6f164f16 100644 --- a/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs +++ b/milli/src/search/new/ranking_rule_graph/proximity/compute_docids.rs @@ -2,7 +2,6 @@ use std::collections::BTreeSet; -use heed::BytesDecode; use roaring::RoaringBitmap; use super::ProximityCondition; @@ -11,7 +10,7 @@ use crate::search::new::query_term::{Phrase, QueryTermSubset}; use crate::search::new::ranking_rule_graph::ComputedCondition; use crate::search::new::resolve_query_graph::compute_query_term_subset_docids; use crate::search::new::SearchContext; -use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec}; +use crate::Result; pub fn compute_docids( ctx: &mut SearchContext, @@ -92,9 +91,7 @@ pub fn compute_docids( if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) { continue; } - } else if let Some(lw_bytes) = ctx.get_db_word_docids(left_word)? { - let left_word_docids = - RoaringBitmapCodec::bytes_decode(lw_bytes).ok_or(heed::Error::Decoding)?; + } else if let Some(left_word_docids) = ctx.get_db_word_docids(left_word)? { if universe.is_disjoint(&left_word_docids) { continue; } @@ -155,7 +152,7 @@ fn compute_prefix_edges( if let Some(new_docids) = ctx.get_db_word_prefix_pair_proximity_docids(left_word, right_prefix, forward_proximity)? { - let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?; + let new_docids = &universe & new_docids; if !new_docids.is_empty() { used_left_words.insert(left_word); used_right_prefix.insert(right_prefix); @@ -170,7 +167,7 @@ fn compute_prefix_edges( left_word, backward_proximity, )? { - let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?; + let new_docids = &universe & new_docids; if !new_docids.is_empty() { used_left_words.insert(left_word); used_right_prefix.insert(right_prefix); @@ -217,7 +214,7 @@ fn compute_non_prefix_edges( if let Some(new_docids) = ctx.get_db_word_pair_proximity_docids(word1, word2, forward_proximity)? { - let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?; + let new_docids = &universe & new_docids; if !new_docids.is_empty() { used_left_words.insert(word1); used_right_words.insert(word2); @@ -231,7 +228,7 @@ fn compute_non_prefix_edges( if let Some(new_docids) = ctx.get_db_word_pair_proximity_docids(word2, word1, backward_proximity)? { - let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?; + let new_docids = &universe & new_docids; if !new_docids.is_empty() { used_left_words.insert(word2); used_right_words.insert(word1); diff --git a/milli/src/search/new/resolve_query_graph.rs b/milli/src/search/new/resolve_query_graph.rs index ef7adad14..bca8b6268 100644 --- a/milli/src/search/new/resolve_query_graph.rs +++ b/milli/src/search/new/resolve_query_graph.rs @@ -3,7 +3,6 @@ use std::collections::VecDeque; use fxhash::FxHashMap; -use heed::BytesDecode; use roaring::RoaringBitmap; use super::interner::Interned; @@ -12,7 +11,7 @@ use super::query_term::{Phrase, QueryTermSubset}; use super::small_bitmap::SmallBitmap; use super::{QueryGraph, SearchContext}; use crate::search::new::query_term::LocatedQueryTermSubset; -use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec}; +use crate::Result; #[derive(Default)] pub struct PhraseDocIdsCache { @@ -37,7 +36,7 @@ pub fn compute_query_term_subset_docids( let mut docids = RoaringBitmap::new(); for word in term.all_single_words_except_prefix_db(ctx)? { if let Some(word_docids) = ctx.get_db_word_docids(word)? { - docids |= RoaringBitmapCodec::bytes_decode(word_docids).ok_or(heed::Error::Decoding)?; + docids |= word_docids; } } for phrase in term.all_phrases(ctx)? { @@ -46,8 +45,7 @@ pub fn compute_query_term_subset_docids( if let Some(prefix) = term.use_prefix_db(ctx) { if let Some(prefix_docids) = ctx.get_db_word_prefix_docids(prefix)? { - docids |= - RoaringBitmapCodec::bytes_decode(prefix_docids).ok_or(heed::Error::Decoding)?; + docids |= prefix_docids; } } @@ -128,8 +126,7 @@ pub fn compute_phrase_docids( if words.len() == 1 { if let Some(word) = &words[0] { if let Some(word_docids) = ctx.get_db_word_docids(*word)? { - return RoaringBitmapCodec::bytes_decode(word_docids) - .ok_or(heed::Error::Decoding.into()); + return Ok(word_docids); } else { return Ok(RoaringBitmap::new()); } @@ -158,7 +155,7 @@ pub fn compute_phrase_docids( { if dist == 0 { match ctx.get_db_word_pair_proximity_docids(s1, s2, 1)? { - Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?), + Some(m) => bitmaps.push(m), // If there are no documents for this pair, there will be no // results for the phrase query. None => return Ok(RoaringBitmap::new()), @@ -169,7 +166,7 @@ pub fn compute_phrase_docids( if let Some(m) = ctx.get_db_word_pair_proximity_docids(s1, s2, dist as u8 + 1)? { - bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?; + bitmap |= m; } } if bitmap.is_empty() {