mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-11 15:38:55 +01:00
Update call sites
This commit is contained in:
parent
244003e36f
commit
e7ff987c46
@ -1,11 +1,10 @@
|
|||||||
use heed::BytesDecode;
|
|
||||||
use roaring::{MultiOps, RoaringBitmap};
|
use roaring::{MultiOps, RoaringBitmap};
|
||||||
|
|
||||||
use super::query_graph::QueryGraph;
|
use super::query_graph::QueryGraph;
|
||||||
use super::ranking_rules::{RankingRule, RankingRuleOutput};
|
use super::ranking_rules::{RankingRule, RankingRuleOutput};
|
||||||
use crate::search::new::query_graph::QueryNodeData;
|
use crate::search::new::query_graph::QueryNodeData;
|
||||||
use crate::search::new::query_term::ExactTerm;
|
use crate::search::new::query_term::ExactTerm;
|
||||||
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger};
|
use crate::{Result, SearchContext, SearchLogger};
|
||||||
|
|
||||||
/// A ranking rule that produces 3 disjoint buckets:
|
/// A ranking rule that produces 3 disjoint buckets:
|
||||||
///
|
///
|
||||||
@ -161,10 +160,8 @@ impl State {
|
|||||||
// Note: Since the position is stored bucketed in word_position_docids, for queries with a lot of
|
// Note: Since the position is stored bucketed in word_position_docids, for queries with a lot of
|
||||||
// longer phrases we'll be losing on precision here.
|
// longer phrases we'll be losing on precision here.
|
||||||
let bucketed_position = crate::bucketed_position(position + offset);
|
let bucketed_position = crate::bucketed_position(position + offset);
|
||||||
let word_position_docids = CboRoaringBitmapCodec::bytes_decode(
|
let word_position_docids =
|
||||||
ctx.get_db_word_position_docids(*word, bucketed_position)?.unwrap_or_default(),
|
ctx.get_db_word_position_docids(*word, bucketed_position)?.unwrap_or_default();
|
||||||
)
|
|
||||||
.unwrap_or_default();
|
|
||||||
candidates &= word_position_docids;
|
candidates &= word_position_docids;
|
||||||
if candidates.is_empty() {
|
if candidates.is_empty() {
|
||||||
return Ok(State::Empty(query_graph.clone()));
|
return Ok(State::Empty(query_graph.clone()));
|
||||||
@ -191,11 +188,7 @@ impl State {
|
|||||||
// ignore stop words words in phrases
|
// ignore stop words words in phrases
|
||||||
.flatten()
|
.flatten()
|
||||||
.map(|word| -> Result<_> {
|
.map(|word| -> Result<_> {
|
||||||
Ok(ctx
|
Ok(ctx.get_db_word_fid_docids(*word, fid)?.unwrap_or_default())
|
||||||
.get_db_word_fid_docids(*word, fid)?
|
|
||||||
.map(CboRoaringBitmapCodec::bytes_decode)
|
|
||||||
.unwrap_or_default()
|
|
||||||
.unwrap_or_default())
|
|
||||||
}),
|
}),
|
||||||
)?;
|
)?;
|
||||||
intersection &= &candidates;
|
intersection &= &candidates;
|
||||||
|
@ -1,17 +1,17 @@
|
|||||||
use fst::automaton::Str;
|
|
||||||
use fst::{Automaton, IntoStreamer, Streamer};
|
|
||||||
use heed::types::DecodeIgnore;
|
|
||||||
use heed::BytesDecode;
|
|
||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
use std::ops::ControlFlow;
|
use std::ops::ControlFlow;
|
||||||
|
|
||||||
|
use fst::automaton::Str;
|
||||||
|
use fst::{Automaton, IntoStreamer, Streamer};
|
||||||
|
use heed::types::DecodeIgnore;
|
||||||
|
|
||||||
use super::*;
|
use super::*;
|
||||||
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
use crate::search::fst_utils::{Complement, Intersection, StartsWith, Union};
|
||||||
use crate::search::new::query_term::TwoTypoTerm;
|
use crate::search::new::query_term::TwoTypoTerm;
|
||||||
use crate::search::new::{limits, SearchContext};
|
use crate::search::new::{limits, SearchContext};
|
||||||
use crate::search::{build_dfa, get_first};
|
use crate::search::{build_dfa, get_first};
|
||||||
use crate::{CboRoaringBitmapLenCodec, Result, MAX_WORD_LENGTH};
|
use crate::{Result, MAX_WORD_LENGTH};
|
||||||
|
|
||||||
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
#[derive(Debug, Clone, Copy, PartialEq, Eq, Hash)]
|
||||||
pub enum NumberOfTypos {
|
pub enum NumberOfTypos {
|
||||||
@ -385,9 +385,7 @@ fn split_best_frequency(
|
|||||||
let left = ctx.word_interner.insert(left.to_owned());
|
let left = ctx.word_interner.insert(left.to_owned());
|
||||||
let right = ctx.word_interner.insert(right.to_owned());
|
let right = ctx.word_interner.insert(right.to_owned());
|
||||||
|
|
||||||
if let Some(docid_bytes) = ctx.get_db_word_pair_proximity_docids(left, right, 1)? {
|
if let Some(frequency) = ctx.get_db_word_pair_proximity_docids_len(left, right, 1)? {
|
||||||
let frequency =
|
|
||||||
CboRoaringBitmapLenCodec::bytes_decode(docid_bytes).ok_or(heed::Error::Decoding)?;
|
|
||||||
if best.map_or(true, |(old, _, _)| frequency > old) {
|
if best.map_or(true, |(old, _, _)| frequency > old) {
|
||||||
best = Some((frequency, left, right));
|
best = Some((frequency, left, right));
|
||||||
}
|
}
|
||||||
|
@ -1,4 +1,3 @@
|
|||||||
use heed::BytesDecode;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{ComputedCondition, RankingRuleGraphTrait};
|
use super::{ComputedCondition, RankingRuleGraphTrait};
|
||||||
@ -28,7 +27,7 @@ fn compute_docids(
|
|||||||
ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(),
|
ExactTerm::Phrase(phrase) => ctx.get_phrase_docids(phrase)?.clone(),
|
||||||
ExactTerm::Word(word) => {
|
ExactTerm::Word(word) => {
|
||||||
if let Some(word_candidates) = ctx.get_db_word_docids(word)? {
|
if let Some(word_candidates) = ctx.get_db_word_docids(word)? {
|
||||||
RoaringBitmapCodec::bytes_decode(word_candidates).ok_or(heed::Error::Decoding)?
|
word_candidates
|
||||||
} else {
|
} else {
|
||||||
return Ok(Default::default());
|
return Ok(Default::default());
|
||||||
}
|
}
|
||||||
|
@ -2,7 +2,6 @@
|
|||||||
|
|
||||||
use std::collections::BTreeSet;
|
use std::collections::BTreeSet;
|
||||||
|
|
||||||
use heed::BytesDecode;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::ProximityCondition;
|
use super::ProximityCondition;
|
||||||
@ -11,7 +10,7 @@ use crate::search::new::query_term::{Phrase, QueryTermSubset};
|
|||||||
use crate::search::new::ranking_rule_graph::ComputedCondition;
|
use crate::search::new::ranking_rule_graph::ComputedCondition;
|
||||||
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
use crate::search::new::resolve_query_graph::compute_query_term_subset_docids;
|
||||||
use crate::search::new::SearchContext;
|
use crate::search::new::SearchContext;
|
||||||
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
|
use crate::Result;
|
||||||
|
|
||||||
pub fn compute_docids(
|
pub fn compute_docids(
|
||||||
ctx: &mut SearchContext,
|
ctx: &mut SearchContext,
|
||||||
@ -92,9 +91,7 @@ pub fn compute_docids(
|
|||||||
if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) {
|
if universe.is_disjoint(ctx.get_phrase_docids(left_phrase)?) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
} else if let Some(lw_bytes) = ctx.get_db_word_docids(left_word)? {
|
} else if let Some(left_word_docids) = ctx.get_db_word_docids(left_word)? {
|
||||||
let left_word_docids =
|
|
||||||
RoaringBitmapCodec::bytes_decode(lw_bytes).ok_or(heed::Error::Decoding)?;
|
|
||||||
if universe.is_disjoint(&left_word_docids) {
|
if universe.is_disjoint(&left_word_docids) {
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
@ -155,7 +152,7 @@ fn compute_prefix_edges(
|
|||||||
if let Some(new_docids) =
|
if let Some(new_docids) =
|
||||||
ctx.get_db_word_prefix_pair_proximity_docids(left_word, right_prefix, forward_proximity)?
|
ctx.get_db_word_prefix_pair_proximity_docids(left_word, right_prefix, forward_proximity)?
|
||||||
{
|
{
|
||||||
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
let new_docids = &universe & new_docids;
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
used_left_words.insert(left_word);
|
used_left_words.insert(left_word);
|
||||||
used_right_prefix.insert(right_prefix);
|
used_right_prefix.insert(right_prefix);
|
||||||
@ -170,7 +167,7 @@ fn compute_prefix_edges(
|
|||||||
left_word,
|
left_word,
|
||||||
backward_proximity,
|
backward_proximity,
|
||||||
)? {
|
)? {
|
||||||
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
let new_docids = &universe & new_docids;
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
used_left_words.insert(left_word);
|
used_left_words.insert(left_word);
|
||||||
used_right_prefix.insert(right_prefix);
|
used_right_prefix.insert(right_prefix);
|
||||||
@ -217,7 +214,7 @@ fn compute_non_prefix_edges(
|
|||||||
if let Some(new_docids) =
|
if let Some(new_docids) =
|
||||||
ctx.get_db_word_pair_proximity_docids(word1, word2, forward_proximity)?
|
ctx.get_db_word_pair_proximity_docids(word1, word2, forward_proximity)?
|
||||||
{
|
{
|
||||||
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
let new_docids = &universe & new_docids;
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
used_left_words.insert(word1);
|
used_left_words.insert(word1);
|
||||||
used_right_words.insert(word2);
|
used_right_words.insert(word2);
|
||||||
@ -231,7 +228,7 @@ fn compute_non_prefix_edges(
|
|||||||
if let Some(new_docids) =
|
if let Some(new_docids) =
|
||||||
ctx.get_db_word_pair_proximity_docids(word2, word1, backward_proximity)?
|
ctx.get_db_word_pair_proximity_docids(word2, word1, backward_proximity)?
|
||||||
{
|
{
|
||||||
let new_docids = &universe & CboRoaringBitmapCodec::deserialize_from(new_docids)?;
|
let new_docids = &universe & new_docids;
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
used_left_words.insert(word2);
|
used_left_words.insert(word2);
|
||||||
used_right_words.insert(word1);
|
used_right_words.insert(word1);
|
||||||
|
@ -3,7 +3,6 @@
|
|||||||
use std::collections::VecDeque;
|
use std::collections::VecDeque;
|
||||||
|
|
||||||
use fxhash::FxHashMap;
|
use fxhash::FxHashMap;
|
||||||
use heed::BytesDecode;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::interner::Interned;
|
use super::interner::Interned;
|
||||||
@ -12,7 +11,7 @@ use super::query_term::{Phrase, QueryTermSubset};
|
|||||||
use super::small_bitmap::SmallBitmap;
|
use super::small_bitmap::SmallBitmap;
|
||||||
use super::{QueryGraph, SearchContext};
|
use super::{QueryGraph, SearchContext};
|
||||||
use crate::search::new::query_term::LocatedQueryTermSubset;
|
use crate::search::new::query_term::LocatedQueryTermSubset;
|
||||||
use crate::{CboRoaringBitmapCodec, Result, RoaringBitmapCodec};
|
use crate::Result;
|
||||||
|
|
||||||
#[derive(Default)]
|
#[derive(Default)]
|
||||||
pub struct PhraseDocIdsCache {
|
pub struct PhraseDocIdsCache {
|
||||||
@ -37,7 +36,7 @@ pub fn compute_query_term_subset_docids(
|
|||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
for word in term.all_single_words_except_prefix_db(ctx)? {
|
for word in term.all_single_words_except_prefix_db(ctx)? {
|
||||||
if let Some(word_docids) = ctx.get_db_word_docids(word)? {
|
if let Some(word_docids) = ctx.get_db_word_docids(word)? {
|
||||||
docids |= RoaringBitmapCodec::bytes_decode(word_docids).ok_or(heed::Error::Decoding)?;
|
docids |= word_docids;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
for phrase in term.all_phrases(ctx)? {
|
for phrase in term.all_phrases(ctx)? {
|
||||||
@ -46,8 +45,7 @@ pub fn compute_query_term_subset_docids(
|
|||||||
|
|
||||||
if let Some(prefix) = term.use_prefix_db(ctx) {
|
if let Some(prefix) = term.use_prefix_db(ctx) {
|
||||||
if let Some(prefix_docids) = ctx.get_db_word_prefix_docids(prefix)? {
|
if let Some(prefix_docids) = ctx.get_db_word_prefix_docids(prefix)? {
|
||||||
docids |=
|
docids |= prefix_docids;
|
||||||
RoaringBitmapCodec::bytes_decode(prefix_docids).ok_or(heed::Error::Decoding)?;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -128,8 +126,7 @@ pub fn compute_phrase_docids(
|
|||||||
if words.len() == 1 {
|
if words.len() == 1 {
|
||||||
if let Some(word) = &words[0] {
|
if let Some(word) = &words[0] {
|
||||||
if let Some(word_docids) = ctx.get_db_word_docids(*word)? {
|
if let Some(word_docids) = ctx.get_db_word_docids(*word)? {
|
||||||
return RoaringBitmapCodec::bytes_decode(word_docids)
|
return Ok(word_docids);
|
||||||
.ok_or(heed::Error::Decoding.into());
|
|
||||||
} else {
|
} else {
|
||||||
return Ok(RoaringBitmap::new());
|
return Ok(RoaringBitmap::new());
|
||||||
}
|
}
|
||||||
@ -158,7 +155,7 @@ pub fn compute_phrase_docids(
|
|||||||
{
|
{
|
||||||
if dist == 0 {
|
if dist == 0 {
|
||||||
match ctx.get_db_word_pair_proximity_docids(s1, s2, 1)? {
|
match ctx.get_db_word_pair_proximity_docids(s1, s2, 1)? {
|
||||||
Some(m) => bitmaps.push(CboRoaringBitmapCodec::deserialize_from(m)?),
|
Some(m) => bitmaps.push(m),
|
||||||
// If there are no documents for this pair, there will be no
|
// If there are no documents for this pair, there will be no
|
||||||
// results for the phrase query.
|
// results for the phrase query.
|
||||||
None => return Ok(RoaringBitmap::new()),
|
None => return Ok(RoaringBitmap::new()),
|
||||||
@ -169,7 +166,7 @@ pub fn compute_phrase_docids(
|
|||||||
if let Some(m) =
|
if let Some(m) =
|
||||||
ctx.get_db_word_pair_proximity_docids(s1, s2, dist as u8 + 1)?
|
ctx.get_db_word_pair_proximity_docids(s1, s2, dist as u8 + 1)?
|
||||||
{
|
{
|
||||||
bitmap |= CboRoaringBitmapCodec::deserialize_from(m)?;
|
bitmap |= m;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if bitmap.is_empty() {
|
if bitmap.is_empty() {
|
||||||
|
Loading…
Reference in New Issue
Block a user