729: Fix distincted exhaustive hits r=Kerollmops a=ManyTheFish

This PR changes the name and behavior of `bucket_candidates`:
- `bucket_candidates` become `initial_candidates` that is less confusing
- `initial_candidates` is no more a simple `RoaringBitmap` but an enum allowing us to precise if the candidates are exhaustive or not
- this enum ensures that any modification is allowed only if the candidates are not already exhaustive.

The bug occurred because `initial_candidates` are modified during the bucket sort allowing the estimation to be more and more precise along the search, and this was an issue when the `initial_candidates` were already exhaustive, now, if candidates are exhaustive, then no modifications are made.

Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
bors[bot] 2022-12-08 09:26:34 +00:00 committed by GitHub
commit 1f1beae077
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
12 changed files with 285 additions and 113 deletions

View File

@ -9,7 +9,7 @@ use super::{Criterion, CriterionParameters, CriterionResult};
use crate::facet::FacetType;
use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::search::query_tree::Operation;
use crate::{FieldId, Index, Result};
@ -27,7 +27,7 @@ pub struct AscDesc<'t> {
query_tree: Option<Operation>,
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
allowed_candidates: RoaringBitmap,
bucket_candidates: RoaringBitmap,
initial_candidates: InitialCandidates,
faceted_candidates: RoaringBitmap,
parent: Box<dyn Criterion + 't>,
}
@ -81,7 +81,7 @@ impl<'t> AscDesc<'t> {
candidates: Box::new(std::iter::empty()),
allowed_candidates: RoaringBitmap::new(),
faceted_candidates,
bucket_candidates: RoaringBitmap::new(),
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
parent,
})
}
@ -106,7 +106,7 @@ impl<'t> Criterion for AscDesc<'t> {
query_tree: self.query_tree.clone(),
candidates: Some(take(&mut self.allowed_candidates)),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
initial_candidates: Some(self.initial_candidates.take()),
}));
}
None => match self.parent.next(params)? {
@ -114,7 +114,7 @@ impl<'t> Criterion for AscDesc<'t> {
query_tree,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
self.query_tree = query_tree;
let mut candidates = match (&self.query_tree, candidates) {
@ -130,9 +130,11 @@ impl<'t> Criterion for AscDesc<'t> {
candidates &= filtered_candidates;
}
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
match initial_candidates {
Some(initial_candidates) => {
self.initial_candidates |= initial_candidates
}
None => self.initial_candidates.map_inplace(|c| c | &candidates),
}
if candidates.is_empty() {
@ -160,7 +162,7 @@ impl<'t> Criterion for AscDesc<'t> {
query_tree: self.query_tree.clone(),
candidates: Some(candidates),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
initial_candidates: Some(self.initial_candidates.take()),
}));
}
}

View File

@ -7,7 +7,7 @@ use std::mem::take;
use roaring::RoaringBitmap;
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::Query;
use crate::search::criteria::{InitialCandidates, Query};
use crate::search::query_tree::{Operation, QueryKind};
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
use crate::Result;
@ -26,7 +26,7 @@ type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
pub struct Attribute<'t> {
ctx: &'t dyn Context<'t>,
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
bucket_candidates: RoaringBitmap,
initial_candidates: InitialCandidates,
parent: Box<dyn Criterion + 't>,
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
set_buckets: Option<BinaryHeap<Branch<'t>>>,
@ -37,7 +37,7 @@ impl<'t> Attribute<'t> {
Attribute {
ctx,
state: None,
bucket_candidates: RoaringBitmap::new(),
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
parent,
linear_buckets: None,
set_buckets: None,
@ -60,7 +60,7 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
initial_candidates: Some(self.initial_candidates.take()),
}));
}
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
@ -84,7 +84,7 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
initial_candidates: Some(self.initial_candidates.take()),
}));
}
}
@ -109,7 +109,7 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
initial_candidates: Some(self.initial_candidates.take()),
}));
}
}
@ -124,7 +124,7 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: Some(query_tree),
candidates: Some(found_candidates),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
initial_candidates: Some(self.initial_candidates.take()),
}));
}
None => match self.parent.next(params)? {
@ -132,7 +132,7 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: Some(query_tree),
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
let mut candidates = match candidates {
Some(candidates) => candidates,
@ -148,9 +148,11 @@ impl<'t> Criterion for Attribute<'t> {
let flattened_query_tree = flatten_query_tree(&query_tree);
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
match initial_candidates {
Some(initial_candidates) => {
self.initial_candidates |= initial_candidates
}
None => self.initial_candidates.map_inplace(|c| c | &candidates),
}
self.state = Some((query_tree, flattened_query_tree, candidates));
@ -160,13 +162,13 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}));
}
None => return Ok(None),

View File

@ -8,6 +8,7 @@ use roaring::RoaringBitmap;
use crate::search::criteria::{
resolve_phrase, resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
InitialCandidates,
};
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
use crate::{absolute_from_relative_position, FieldId, Result};
@ -16,7 +17,7 @@ pub struct Exactness<'t> {
ctx: &'t dyn Context<'t>,
query_tree: Option<Operation>,
state: Option<State>,
bucket_candidates: RoaringBitmap,
initial_candidates: InitialCandidates,
parent: Box<dyn Criterion + 't>,
query: Vec<ExactQueryPart>,
}
@ -36,7 +37,7 @@ impl<'t> Exactness<'t> {
ctx,
query_tree: None,
state: None,
bucket_candidates: RoaringBitmap::new(),
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
parent,
query,
})
@ -68,7 +69,7 @@ impl<'t> Criterion for Exactness<'t> {
query_tree: self.query_tree.clone(),
candidates: Some(candidates),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
initial_candidates: Some(self.initial_candidates.take()),
}));
}
None => match self.parent.next(params)? {
@ -76,7 +77,7 @@ impl<'t> Criterion for Exactness<'t> {
query_tree: Some(query_tree),
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
let mut candidates = match candidates {
Some(candidates) => candidates,
@ -90,9 +91,11 @@ impl<'t> Criterion for Exactness<'t> {
candidates &= filtered_candidates;
}
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
match initial_candidates {
Some(initial_candidates) => {
self.initial_candidates |= initial_candidates
}
None => self.initial_candidates.map_inplace(|c| c | &candidates),
}
self.state = Some(State::new(candidates));
@ -102,13 +105,13 @@ impl<'t> Criterion for Exactness<'t> {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}));
}
None => return Ok(None),

View File

@ -2,6 +2,7 @@ use log::debug;
use roaring::RoaringBitmap;
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::InitialCandidates;
use crate::search::query_tree::Operation;
use crate::search::WordDerivationsCache;
use crate::Result;
@ -14,7 +15,7 @@ pub struct FinalResult {
/// The candidates of the current bucket of the last criterion.
pub candidates: RoaringBitmap,
/// Candidates that comes from the current bucket of the initial criterion.
pub bucket_candidates: RoaringBitmap,
pub initial_candidates: InitialCandidates,
}
pub struct Final<'t> {
@ -49,7 +50,7 @@ impl<'t> Final<'t> {
query_tree,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
let mut candidates = match (candidates, query_tree.as_ref()) {
(Some(candidates), _) => candidates,
@ -63,11 +64,12 @@ impl<'t> Final<'t> {
candidates &= filtered_candidates;
}
let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone());
let initial_candidates = initial_candidates
.unwrap_or_else(|| InitialCandidates::Estimated(candidates.clone()));
self.returned_candidates |= &candidates;
Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }))
Ok(Some(FinalResult { query_tree, candidates, initial_candidates }))
}
None => Ok(None),
}

View File

@ -4,7 +4,7 @@ use roaring::RoaringBitmap;
use rstar::RTree;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
use crate::{lat_lng_to_xyz, GeoPoint, Index, Result};
pub struct Geo<'t> {
@ -14,7 +14,7 @@ pub struct Geo<'t> {
parent: Box<dyn Criterion + 't>,
candidates: Box<dyn Iterator<Item = RoaringBitmap>>,
allowed_candidates: RoaringBitmap,
bucket_candidates: RoaringBitmap,
initial_candidates: InitialCandidates,
rtree: Option<RTree<GeoPoint>>,
point: [f64; 2],
}
@ -47,7 +47,7 @@ impl<'t> Geo<'t> {
) -> Result<Self> {
let candidates = Box::new(iter::empty());
let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?;
let bucket_candidates = RoaringBitmap::new();
let initial_candidates = InitialCandidates::Estimated(RoaringBitmap::new());
let rtree = index.geo_rtree(rtxn)?;
Ok(Self {
@ -57,7 +57,7 @@ impl<'t> Geo<'t> {
parent,
candidates,
allowed_candidates,
bucket_candidates,
initial_candidates,
rtree,
point,
})
@ -77,7 +77,7 @@ impl Criterion for Geo<'_> {
query_tree: None,
candidates: Some(candidates),
filtered_candidates: None,
bucket_candidates: Some(self.bucket_candidates.clone()),
initial_candidates: Some(self.initial_candidates.clone()),
}));
}
None => match self.parent.next(params)? {
@ -85,7 +85,7 @@ impl Criterion for Geo<'_> {
query_tree,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
let mut candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates,
@ -100,9 +100,11 @@ impl Criterion for Geo<'_> {
candidates &= filtered_candidates;
}
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
match initial_candidates {
Some(initial_candidates) => {
self.initial_candidates |= initial_candidates
}
None => self.initial_candidates.map_inplace(|c| c | &candidates),
}
if candidates.is_empty() {

View File

@ -1,7 +1,7 @@
use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, Context};
use crate::search::criteria::{resolve_query_tree, Context, InitialCandidates};
use crate::search::query_tree::Operation;
use crate::search::Distinct;
use crate::Result;
@ -27,7 +27,7 @@ impl<'t, D> Initial<'t, D> {
query_tree,
candidates: None,
filtered_candidates,
bucket_candidates: None,
initial_candidates: None,
};
Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
}
@ -41,32 +41,34 @@ impl<D: Distinct> Criterion for Initial<'_, D> {
.map(|mut answer| {
if self.exhaustive_number_hits && answer.query_tree.is_some() {
// resolve the whole query tree to retrieve an exhaustive list of documents matching the query.
// then remove the potential soft deleted documents.
let mut candidates = resolve_query_tree(
self.ctx,
answer.query_tree.as_ref().unwrap(),
params.wdcache,
)?;
)? - params.excluded_candidates;
// Apply the filters on the documents retrieved with the query tree.
if let Some(ref filtered_candidates) = answer.filtered_candidates {
candidates &= filtered_candidates;
}
// because the bucket_candidates should be an exhaustive count of the matching documents,
// because the initial_candidates should be an exhaustive count of the matching documents,
// we precompute the distinct attributes.
let bucket_candidates = match &mut self.distinct {
let initial_candidates = match &mut self.distinct {
Some(distinct) => {
let mut bucket_candidates = RoaringBitmap::new();
let mut initial_candidates = RoaringBitmap::new();
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
bucket_candidates.insert(c?);
initial_candidates.insert(c?);
}
bucket_candidates
initial_candidates
}
None => candidates.clone(),
};
answer.candidates = Some(candidates);
answer.bucket_candidates = Some(bucket_candidates);
answer.initial_candidates =
Some(InitialCandidates::Exhaustive(initial_candidates));
}
Ok(answer)
})

View File

@ -1,5 +1,7 @@
use std::borrow::Cow;
use std::collections::HashMap;
use std::mem::take;
use std::ops::{BitOr, BitOrAssign};
use roaring::RoaringBitmap;
@ -41,7 +43,7 @@ pub struct CriterionResult {
/// The candidates, coming from facet filters, that this criterion is allowed to return subsets of.
filtered_candidates: Option<RoaringBitmap>,
/// Candidates that comes from the current bucket of the initial criterion.
bucket_candidates: Option<RoaringBitmap>,
initial_candidates: Option<InitialCandidates>,
}
#[derive(Debug, PartialEq)]
@ -65,6 +67,71 @@ impl Default for Candidates {
}
}
/// Either a set of candidates that defines the estimated set of candidates
/// that could be returned,
/// or the Exhaustive set of candidates that will be returned if all possible results are fetched.
#[derive(Debug, Clone, PartialEq)]
pub enum InitialCandidates {
Estimated(RoaringBitmap),
Exhaustive(RoaringBitmap),
}
impl InitialCandidates {
fn take(&mut self) -> Self {
match self {
Self::Estimated(c) => Self::Estimated(take(c)),
Self::Exhaustive(c) => Self::Exhaustive(take(c)),
}
}
/// modify the containing roaring bitmap inplace if the set isn't already Exhaustive.
pub fn map_inplace<F>(&mut self, f: F)
where
F: FnOnce(RoaringBitmap) -> RoaringBitmap,
{
if let Self::Estimated(c) = self {
*c = f(take(c))
}
}
pub fn into_inner(self) -> RoaringBitmap {
match self {
Self::Estimated(c) => c,
Self::Exhaustive(c) => c,
}
}
}
impl BitOrAssign for InitialCandidates {
/// Make an union between the containing roaring bitmaps if the set isn't already Exhaustive.
/// In the case of rhs is Exhaustive and not self, then rhs replaces self.
fn bitor_assign(&mut self, rhs: Self) {
if let Self::Estimated(c) = self {
*self = match rhs {
Self::Estimated(rhs) => Self::Estimated(rhs | &*c),
Self::Exhaustive(rhs) => Self::Exhaustive(rhs),
}
}
}
}
impl BitOr for InitialCandidates {
type Output = Self;
/// Make an union between the containing roaring bitmaps if the set isn't already Exhaustive.
/// In the case of rhs is Exhaustive and not self, then rhs replaces self.
fn bitor(self, rhs: Self) -> Self::Output {
if let Self::Estimated(c) = self {
match rhs {
Self::Estimated(rhs) => Self::Estimated(rhs | c),
Self::Exhaustive(rhs) => Self::Exhaustive(rhs),
}
} else {
self.clone()
}
}
}
pub trait Context<'c> {
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;

View File

@ -1,6 +1,5 @@
use std::collections::btree_map::{self, BTreeMap};
use std::collections::hash_map::HashMap;
use std::mem::take;
use log::debug;
use roaring::RoaringBitmap;
@ -10,6 +9,7 @@ use super::{
query_docids, query_pair_proximity_docids, resolve_phrase, resolve_query_tree, Context,
Criterion, CriterionParameters, CriterionResult,
};
use crate::search::criteria::InitialCandidates;
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
use crate::search::{build_dfa, WordDerivationsCache};
use crate::{Position, Result};
@ -29,7 +29,7 @@ pub struct Proximity<'t> {
/// (max_proximity, query_tree, allowed_candidates)
state: Option<(u8, Operation, RoaringBitmap)>,
proximity: u8,
bucket_candidates: RoaringBitmap,
initial_candidates: InitialCandidates,
parent: Box<dyn Criterion + 't>,
candidates_cache: Cache,
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
@ -41,7 +41,7 @@ impl<'t> Proximity<'t> {
ctx,
state: None,
proximity: 0,
bucket_candidates: RoaringBitmap::new(),
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
parent,
candidates_cache: Cache::new(),
plane_sweep_cache: None,
@ -115,7 +115,7 @@ impl<'t> Criterion for Proximity<'t> {
query_tree: Some(query_tree.clone()),
candidates: Some(new_candidates),
filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)),
initial_candidates: Some(self.initial_candidates.take()),
}));
}
None => match self.parent.next(params)? {
@ -123,7 +123,7 @@ impl<'t> Criterion for Proximity<'t> {
query_tree: Some(query_tree),
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
let mut candidates = match candidates {
Some(candidates) => candidates,
@ -137,9 +137,11 @@ impl<'t> Criterion for Proximity<'t> {
candidates &= filtered_candidates;
}
match bucket_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
None => self.bucket_candidates |= &candidates,
match initial_candidates {
Some(initial_candidates) => {
self.initial_candidates |= initial_candidates
}
None => self.initial_candidates.map_inplace(|c| c | &candidates),
}
let maximum_proximity = maximum_proximity(&query_tree);
@ -151,13 +153,13 @@ impl<'t> Criterion for Proximity<'t> {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}));
}
None => return Ok(None),

View File

@ -9,7 +9,7 @@ use super::{
query_docids, resolve_query_tree, Candidates, Context, Criterion, CriterionParameters,
CriterionResult,
};
use crate::search::criteria::resolve_phrase;
use crate::search::criteria::{resolve_phrase, InitialCandidates};
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
use crate::search::{word_derivations, WordDerivationsCache};
use crate::Result;
@ -22,7 +22,7 @@ pub struct Typo<'t> {
/// (max_typos, query_tree, candidates)
state: Option<(u8, Operation, Candidates)>,
typos: u8,
bucket_candidates: Option<RoaringBitmap>,
initial_candidates: Option<InitialCandidates>,
parent: Box<dyn Criterion + 't>,
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
}
@ -33,7 +33,7 @@ impl<'t> Typo<'t> {
ctx,
state: None,
typos: 0,
bucket_candidates: None,
initial_candidates: None,
parent,
candidates_cache: HashMap::new(),
}
@ -120,9 +120,9 @@ impl<'t> Criterion for Typo<'t> {
}
}
let bucket_candidates = match self.bucket_candidates.as_mut() {
Some(bucket_candidates) => take(bucket_candidates),
None => candidates.clone(),
let initial_candidates = match self.initial_candidates.as_mut() {
Some(initial_candidates) => initial_candidates.take(),
None => InitialCandidates::Estimated(candidates.clone()),
};
self.typos += 1;
@ -131,7 +131,7 @@ impl<'t> Criterion for Typo<'t> {
query_tree: Some(new_query_tree),
candidates: Some(candidates),
filtered_candidates: None,
bucket_candidates: Some(bucket_candidates),
initial_candidates: Some(initial_candidates),
}));
}
None => match self.parent.next(params)? {
@ -139,14 +139,9 @@ impl<'t> Criterion for Typo<'t> {
query_tree: Some(query_tree),
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
self.bucket_candidates =
match (self.bucket_candidates.take(), bucket_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc),
};
self.initial_candidates = initial_candidates;
let candidates = match candidates.or(filtered_candidates) {
Some(candidates) => {
Candidates::Allowed(candidates - params.excluded_candidates)
@ -162,13 +157,13 @@ impl<'t> Criterion for Typo<'t> {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}));
}
None => return Ok(None),
@ -356,7 +351,7 @@ mod test {
let result = display_criteria(criteria, criterion_parameters);
insta::assert_snapshot!(result, @r###"
CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, bucket_candidates: None }
CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, initial_candidates: None }
"###);
}
@ -399,7 +394,7 @@ mod test {
Exact { word: "split" }
Exact { word: "this" }
Exact { word: "world" }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
CriterionResult { query_tree: Some(OR
AND
@ -408,7 +403,7 @@ mod test {
OR
Exact { word: "word" }
Exact { word: "world" }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
"###);
}
@ -434,7 +429,7 @@ mod test {
let result = display_criteria(criteria, criterion_parameters);
insta::assert_snapshot!(result, @r###"
CriterionResult { query_tree: None, candidates: None, filtered_candidates: Some(RoaringBitmap<8000 values between 986424 and 4294786076>), bucket_candidates: None }
CriterionResult { query_tree: None, candidates: None, filtered_candidates: Some(RoaringBitmap<8000 values between 986424 and 4294786076>), initial_candidates: None }
"###);
}
@ -482,7 +477,7 @@ mod test {
Exact { word: "split" }
Exact { word: "this" }
Exact { word: "world" }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
CriterionResult { query_tree: Some(OR
AND
@ -491,7 +486,7 @@ mod test {
OR
Exact { word: "word" }
Exact { word: "world" }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
"###);
}

View File

@ -1,9 +1,8 @@
use std::mem::take;
use log::debug;
use roaring::RoaringBitmap;
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::InitialCandidates;
use crate::search::query_tree::Operation;
use crate::Result;
@ -11,7 +10,7 @@ pub struct Words<'t> {
ctx: &'t dyn Context<'t>,
query_trees: Vec<Operation>,
candidates: Option<RoaringBitmap>,
bucket_candidates: Option<RoaringBitmap>,
initial_candidates: Option<InitialCandidates>,
filtered_candidates: Option<RoaringBitmap>,
parent: Box<dyn Criterion + 't>,
}
@ -22,7 +21,7 @@ impl<'t> Words<'t> {
ctx,
query_trees: Vec::default(),
candidates: None,
bucket_candidates: None,
initial_candidates: None,
parent,
filtered_candidates: None,
}
@ -53,13 +52,13 @@ impl<'t> Criterion for Words<'t> {
None => None,
};
let bucket_candidates = self.bucket_candidates.as_mut().map(take);
let initial_candidates = self.initial_candidates.clone();
return Ok(Some(CriterionResult {
query_tree: Some(query_tree),
candidates,
filtered_candidates: self.filtered_candidates.clone(),
bucket_candidates,
initial_candidates,
}));
}
None => match self.parent.next(params)? {
@ -67,14 +66,14 @@ impl<'t> Criterion for Words<'t> {
query_tree: Some(query_tree),
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
self.query_trees = explode_query_tree(query_tree);
self.candidates = candidates;
self.filtered_candidates = filtered_candidates;
self.bucket_candidates =
match (self.bucket_candidates.take(), bucket_candidates) {
self.initial_candidates =
match (self.initial_candidates.take(), initial_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc),
};
@ -83,13 +82,13 @@ impl<'t> Criterion for Words<'t> {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}) => {
return Ok(Some(CriterionResult {
query_tree: None,
candidates,
filtered_candidates,
bucket_candidates,
initial_candidates,
}));
}
None => return Ok(None),

View File

@ -23,6 +23,7 @@ pub use self::matches::{
use self::query_tree::QueryTreeBuilder;
use crate::error::UserError;
use crate::search::criteria::r#final::{Final, FinalResult};
use crate::search::criteria::InitialCandidates;
use crate::{AscDesc, Criterion, DocumentId, Index, Member, Result};
// Building these factories is not free.
@ -235,11 +236,11 @@ impl<'a> Search<'a> {
mut criteria: Final,
) -> Result<SearchResult> {
let mut offset = self.offset;
let mut initial_candidates = RoaringBitmap::new();
let mut initial_candidates = InitialCandidates::Estimated(RoaringBitmap::new());
let mut excluded_candidates = self.index.soft_deleted_documents_ids(self.rtxn)?;
let mut documents_ids = Vec::new();
while let Some(FinalResult { candidates, bucket_candidates, .. }) =
while let Some(FinalResult { candidates, initial_candidates: ic, .. }) =
criteria.next(&excluded_candidates)?
{
debug!("Number of candidates found {}", candidates.len());
@ -247,7 +248,7 @@ impl<'a> Search<'a> {
let excluded = take(&mut excluded_candidates);
let mut candidates = distinct.distinct(candidates, excluded);
initial_candidates |= bucket_candidates;
initial_candidates |= ic;
if offset != 0 {
let discarded = candidates.by_ref().take(offset).count();
@ -265,9 +266,11 @@ impl<'a> Search<'a> {
}
}
initial_candidates.map_inplace(|c| c - excluded_candidates);
Ok(SearchResult {
matching_words,
candidates: initial_candidates - excluded_candidates,
candidates: initial_candidates.into_inner(),
documents_ids,
})
}

View File

@ -8,7 +8,7 @@ use Criterion::*;
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
macro_rules! test_distinct {
($func:ident, $distinct:ident, $criteria:expr, $n_res:expr) => {
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $criteria:expr, $n_res:expr) => {
#[test]
fn $func() {
let criteria = $criteria;
@ -26,7 +26,8 @@ macro_rules! test_distinct {
let mut search = Search::new(&rtxn, &index);
search.query(search::TEST_QUERY);
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
search.limit($limit);
search.exhaustive_number_hits($exhaustive);
search.authorize_typos(true);
search.terms_matching_strategy(TermsMatchingStrategy::default());
@ -46,6 +47,7 @@ macro_rules! test_distinct {
Some(d.id)
}
})
.take($limit)
.collect();
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
@ -54,25 +56,116 @@ macro_rules! test_distinct {
};
}
test_distinct!(
exhaustive_distinct_string_default_criteria,
tag,
true,
1,
vec![Words, Typo, Proximity, Attribute, Exactness],
3
);
test_distinct!(
exhaustive_distinct_number_default_criteria,
asc_desc_rank,
true,
1,
vec![Words, Typo, Proximity, Attribute, Exactness],
7
);
test_distinct!(
distinct_string_default_criteria,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words, Typo, Proximity, Attribute, Exactness],
3
);
test_distinct!(
distinct_number_default_criteria,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words, Typo, Proximity, Attribute, Exactness],
7
);
test_distinct!(distinct_string_criterion_words, tag, vec![Words], 3);
test_distinct!(distinct_number_criterion_words, asc_desc_rank, vec![Words], 7);
test_distinct!(distinct_string_criterion_words_typo, tag, vec![Words, Typo], 3);
test_distinct!(distinct_number_criterion_words_typo, asc_desc_rank, vec![Words, Typo], 7);
test_distinct!(distinct_string_criterion_words_proximity, tag, vec![Words, Proximity], 3);
test_distinct!(distinct_number_criterion_words_proximity, asc_desc_rank, vec![Words, Proximity], 7);
test_distinct!(distinct_string_criterion_words_attribute, tag, vec![Words, Attribute], 3);
test_distinct!(distinct_number_criterion_words_attribute, asc_desc_rank, vec![Words, Attribute], 7);
test_distinct!(distinct_string_criterion_words_exactness, tag, vec![Words, Exactness], 3);
test_distinct!(distinct_number_criterion_words_exactness, asc_desc_rank, vec![Words, Exactness], 7);
test_distinct!(
distinct_string_criterion_words,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words],
3
);
test_distinct!(
distinct_number_criterion_words,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words],
7
);
test_distinct!(
distinct_string_criterion_words_typo,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words, Typo],
3
);
test_distinct!(
distinct_number_criterion_words_typo,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words, Typo],
7
);
test_distinct!(
distinct_string_criterion_words_proximity,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words, Proximity],
3
);
test_distinct!(
distinct_number_criterion_words_proximity,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words, Proximity],
7
);
test_distinct!(
distinct_string_criterion_words_attribute,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words, Attribute],
3
);
test_distinct!(
distinct_number_criterion_words_attribute,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words, Attribute],
7
);
test_distinct!(
distinct_string_criterion_words_exactness,
tag,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words, Exactness],
3
);
test_distinct!(
distinct_number_criterion_words_exactness,
asc_desc_rank,
false,
EXTERNAL_DOCUMENTS_IDS.len(),
vec![Words, Exactness],
7
);