mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Do less useless intersections
This commit is contained in:
parent
0ca1a4e805
commit
41f51adbec
@ -1,3 +1,4 @@
|
|||||||
|
use heed::types::Bytes;
|
||||||
use roaring::{MultiOps, RoaringBitmap};
|
use roaring::{MultiOps, RoaringBitmap};
|
||||||
|
|
||||||
use super::query_graph::QueryGraph;
|
use super::query_graph::QueryGraph;
|
||||||
@ -5,7 +6,7 @@ use super::ranking_rules::{RankingRule, RankingRuleOutput};
|
|||||||
use crate::score_details::{self, ScoreDetails};
|
use crate::score_details::{self, ScoreDetails};
|
||||||
use crate::search::new::query_graph::QueryNodeData;
|
use crate::search::new::query_graph::QueryNodeData;
|
||||||
use crate::search::new::query_term::ExactTerm;
|
use crate::search::new::query_term::ExactTerm;
|
||||||
use crate::{Result, SearchContext, SearchLogger};
|
use crate::{CboRoaringBitmapCodec, Result, SearchContext, SearchLogger};
|
||||||
|
|
||||||
/// A ranking rule that produces 3 disjoint buckets:
|
/// A ranking rule that produces 3 disjoint buckets:
|
||||||
///
|
///
|
||||||
@ -173,8 +174,7 @@ impl State {
|
|||||||
let bucketed_position = crate::bucketed_position(position + offset);
|
let bucketed_position = crate::bucketed_position(position + offset);
|
||||||
let word_position_docids = ctx
|
let word_position_docids = ctx
|
||||||
.get_db_word_position_docids(Some(universe), *word, bucketed_position)?
|
.get_db_word_position_docids(Some(universe), *word, bucketed_position)?
|
||||||
.unwrap_or_default()
|
.unwrap_or_default();
|
||||||
& universe;
|
|
||||||
candidates &= word_position_docids;
|
candidates &= word_position_docids;
|
||||||
if candidates.is_empty() {
|
if candidates.is_empty() {
|
||||||
return Ok(State::Empty(query_graph.clone()));
|
return Ok(State::Empty(query_graph.clone()));
|
||||||
@ -205,16 +205,24 @@ impl State {
|
|||||||
.unwrap_or_default())
|
.unwrap_or_default())
|
||||||
}),
|
}),
|
||||||
)?;
|
)?;
|
||||||
|
// TODO Why not doing this intersection in the MultiOps above?
|
||||||
intersection &= &candidates;
|
intersection &= &candidates;
|
||||||
if !intersection.is_empty() {
|
if !intersection.is_empty() {
|
||||||
// Although not really worth it in terms of performance,
|
// Although not really worth it in terms of performance,
|
||||||
// if would be good to put this in cache for the sake of consistency
|
// if would be good to put this in cache for the sake of consistency
|
||||||
let candidates_with_exact_word_count = if count_all_positions < u8::MAX as usize {
|
let candidates_with_exact_word_count = if count_all_positions < u8::MAX as usize {
|
||||||
ctx.index
|
let bitmap_bytes = ctx
|
||||||
|
.index
|
||||||
.field_id_word_count_docids
|
.field_id_word_count_docids
|
||||||
.get(ctx.txn, &(fid, count_all_positions as u8))?
|
.remap_data_type::<Bytes>()
|
||||||
.unwrap_or_default()
|
.get(ctx.txn, &(fid, count_all_positions as u8))?;
|
||||||
& universe
|
|
||||||
|
match bitmap_bytes {
|
||||||
|
Some(bytes) => {
|
||||||
|
CboRoaringBitmapCodec::intersection_with_serialized(bytes, universe)?
|
||||||
|
}
|
||||||
|
None => RoaringBitmap::default(),
|
||||||
|
}
|
||||||
} else {
|
} else {
|
||||||
RoaringBitmap::default()
|
RoaringBitmap::default()
|
||||||
};
|
};
|
||||||
@ -237,6 +245,8 @@ impl State {
|
|||||||
let (state, output) = match state {
|
let (state, output) = match state {
|
||||||
State::Uninitialized => (state, None),
|
State::Uninitialized => (state, None),
|
||||||
State::ExactAttribute(query_graph, candidates_per_attribute) => {
|
State::ExactAttribute(query_graph, candidates_per_attribute) => {
|
||||||
|
// TODO it can be much faster to do the intersections before the unions...
|
||||||
|
// or maybe the candidates_per_attribute are not containing anything outside universe
|
||||||
let mut candidates = MultiOps::union(candidates_per_attribute.iter().map(
|
let mut candidates = MultiOps::union(candidates_per_attribute.iter().map(
|
||||||
|FieldCandidates { start_with_exact, exact_word_count }| {
|
|FieldCandidates { start_with_exact, exact_word_count }| {
|
||||||
start_with_exact & exact_word_count
|
start_with_exact & exact_word_count
|
||||||
@ -255,6 +265,8 @@ impl State {
|
|||||||
)
|
)
|
||||||
}
|
}
|
||||||
State::AttributeStarts(query_graph, candidates_per_attribute) => {
|
State::AttributeStarts(query_graph, candidates_per_attribute) => {
|
||||||
|
// TODO it can be much faster to do the intersections before the unions...
|
||||||
|
// or maybe the candidates_per_attribute are not containing anything outside universe
|
||||||
let mut candidates = MultiOps::union(candidates_per_attribute.into_iter().map(
|
let mut candidates = MultiOps::union(candidates_per_attribute.into_iter().map(
|
||||||
|FieldCandidates { mut start_with_exact, exact_word_count }| {
|
|FieldCandidates { mut start_with_exact, exact_word_count }| {
|
||||||
start_with_exact -= exact_word_count;
|
start_with_exact -= exact_word_count;
|
||||||
|
@ -29,14 +29,12 @@ impl RankingRuleGraphTrait for FidGraph {
|
|||||||
let FidCondition { term, .. } = condition;
|
let FidCondition { term, .. } = condition;
|
||||||
|
|
||||||
let docids = if let Some(fid) = condition.fid {
|
let docids = if let Some(fid) = condition.fid {
|
||||||
// maybe compute_query_term_subset_docids_within_field_id should accept a universe as argument
|
compute_query_term_subset_docids_within_field_id(
|
||||||
let docids = compute_query_term_subset_docids_within_field_id(
|
|
||||||
ctx,
|
ctx,
|
||||||
Some(universe),
|
Some(universe),
|
||||||
&term.term_subset,
|
&term.term_subset,
|
||||||
fid,
|
fid,
|
||||||
)?;
|
)?
|
||||||
docids & universe
|
|
||||||
} else {
|
} else {
|
||||||
RoaringBitmap::new()
|
RoaringBitmap::new()
|
||||||
};
|
};
|
||||||
|
@ -28,15 +28,15 @@ impl RankingRuleGraphTrait for PositionGraph {
|
|||||||
) -> Result<ComputedCondition> {
|
) -> Result<ComputedCondition> {
|
||||||
let PositionCondition { term, positions } = condition;
|
let PositionCondition { term, positions } = condition;
|
||||||
let mut docids = RoaringBitmap::new();
|
let mut docids = RoaringBitmap::new();
|
||||||
|
// TODO use MultiOps to do the big union
|
||||||
for position in positions {
|
for position in positions {
|
||||||
// maybe compute_query_term_subset_docids_within_position should accept a universe as argument
|
// maybe compute_query_term_subset_docids_within_position should accept a universe as argument
|
||||||
docids |= universe
|
docids |= compute_query_term_subset_docids_within_position(
|
||||||
& compute_query_term_subset_docids_within_position(
|
ctx,
|
||||||
ctx,
|
Some(universe),
|
||||||
Some(universe),
|
&term.term_subset,
|
||||||
&term.term_subset,
|
*position,
|
||||||
*position,
|
)?;
|
||||||
)?;
|
|
||||||
}
|
}
|
||||||
Ok(ComputedCondition {
|
Ok(ComputedCondition {
|
||||||
docids,
|
docids,
|
||||||
|
@ -143,7 +143,6 @@ fn compute_prefix_edges(
|
|||||||
right_prefix,
|
right_prefix,
|
||||||
forward_proximity,
|
forward_proximity,
|
||||||
)? {
|
)? {
|
||||||
let new_docids = &universe & new_docids;
|
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
used_left_words.insert(left_word);
|
used_left_words.insert(left_word);
|
||||||
used_right_prefix.insert(right_prefix);
|
used_right_prefix.insert(right_prefix);
|
||||||
@ -153,13 +152,13 @@ fn compute_prefix_edges(
|
|||||||
|
|
||||||
// No swapping when computing the proximity between a phrase and a word
|
// No swapping when computing the proximity between a phrase and a word
|
||||||
if left_phrase.is_none() {
|
if left_phrase.is_none() {
|
||||||
|
// TODO check that the fact that the universe always changes is not an issue, e.g. caching stuff.
|
||||||
if let Some(new_docids) = ctx.get_db_prefix_word_pair_proximity_docids(
|
if let Some(new_docids) = ctx.get_db_prefix_word_pair_proximity_docids(
|
||||||
Some(&universe),
|
Some(&universe),
|
||||||
right_prefix,
|
right_prefix,
|
||||||
left_word,
|
left_word,
|
||||||
backward_proximity,
|
backward_proximity,
|
||||||
)? {
|
)? {
|
||||||
let new_docids = &universe & new_docids;
|
|
||||||
if !new_docids.is_empty() {
|
if !new_docids.is_empty() {
|
||||||
used_left_words.insert(left_word);
|
used_left_words.insert(left_word);
|
||||||
used_right_prefix.insert(right_prefix);
|
used_right_prefix.insert(right_prefix);
|
||||||
@ -185,9 +184,7 @@ fn compute_non_prefix_edges(
|
|||||||
let mut universe = universe.clone();
|
let mut universe = universe.clone();
|
||||||
|
|
||||||
for phrase in left_phrase.iter().chain(right_phrase.iter()).copied() {
|
for phrase in left_phrase.iter().chain(right_phrase.iter()).copied() {
|
||||||
// TODO do the intersection in the method, again!
|
universe &= ctx.get_phrase_docids(Some(&universe), phrase)?;
|
||||||
let phrase_docids = ctx.get_phrase_docids(Some(&universe), phrase)?;
|
|
||||||
universe &= phrase_docids;
|
|
||||||
if universe.is_empty() {
|
if universe.is_empty() {
|
||||||
return Ok(());
|
return Ok(());
|
||||||
}
|
}
|
||||||
|
@ -251,6 +251,7 @@ pub fn compute_phrase_docids(
|
|||||||
// We sort the bitmaps so that we perform the small intersections first, which is faster.
|
// We sort the bitmaps so that we perform the small intersections first, which is faster.
|
||||||
bitmaps.sort_unstable_by_key(|a| a.len());
|
bitmaps.sort_unstable_by_key(|a| a.len());
|
||||||
|
|
||||||
|
// TODO use MultiOps intersection which and remove the above sort
|
||||||
for bitmap in bitmaps {
|
for bitmap in bitmaps {
|
||||||
candidates &= bitmap;
|
candidates &= bitmap;
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user