mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
Merge #729
729: Fix distincted exhaustive hits r=Kerollmops a=ManyTheFish This PR changes the name and behavior of `bucket_candidates`: - `bucket_candidates` become `initial_candidates` that is less confusing - `initial_candidates` is no more a simple `RoaringBitmap` but an enum allowing us to precise if the candidates are exhaustive or not - this enum ensures that any modification is allowed only if the candidates are not already exhaustive. The bug occurred because `initial_candidates` are modified during the bucket sort allowing the estimation to be more and more precise along the search, and this was an issue when the `initial_candidates` were already exhaustive, now, if candidates are exhaustive, then no modifications are made. Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
1f1beae077
@ -9,7 +9,7 @@ use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::facet::FacetType;
|
||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||
use crate::heed_codec::ByteSliceRefCodec;
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::{FieldId, Index, Result};
|
||||
@ -27,7 +27,7 @@ pub struct AscDesc<'t> {
|
||||
query_tree: Option<Operation>,
|
||||
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
||||
allowed_candidates: RoaringBitmap,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
initial_candidates: InitialCandidates,
|
||||
faceted_candidates: RoaringBitmap,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
}
|
||||
@ -81,7 +81,7 @@ impl<'t> AscDesc<'t> {
|
||||
candidates: Box::new(std::iter::empty()),
|
||||
allowed_candidates: RoaringBitmap::new(),
|
||||
faceted_candidates,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
parent,
|
||||
})
|
||||
}
|
||||
@ -106,7 +106,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(take(&mut self.allowed_candidates)),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
@ -114,7 +114,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
self.query_tree = query_tree;
|
||||
let mut candidates = match (&self.query_tree, candidates) {
|
||||
@ -130,9 +130,11 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
if candidates.is_empty() {
|
||||
@ -160,7 +162,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
|
@ -7,7 +7,7 @@ use std::mem::take;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::Query;
|
||||
use crate::search::criteria::{InitialCandidates, Query};
|
||||
use crate::search::query_tree::{Operation, QueryKind};
|
||||
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
|
||||
use crate::Result;
|
||||
@ -26,7 +26,7 @@ type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
|
||||
pub struct Attribute<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
initial_candidates: InitialCandidates,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
||||
set_buckets: Option<BinaryHeap<Branch<'t>>>,
|
||||
@ -37,7 +37,7 @@ impl<'t> Attribute<'t> {
|
||||
Attribute {
|
||||
ctx,
|
||||
state: None,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
parent,
|
||||
linear_buckets: None,
|
||||
set_buckets: None,
|
||||
@ -60,7 +60,7 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
|
||||
@ -84,7 +84,7 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
@ -109,7 +109,7 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(RoaringBitmap::new()),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
}
|
||||
@ -124,7 +124,7 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
query_tree: Some(query_tree),
|
||||
candidates: Some(found_candidates),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
@ -132,7 +132,7 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
@ -148,9 +148,11 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
|
||||
let flattened_query_tree = flatten_query_tree(&query_tree);
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
self.state = Some((query_tree, flattened_query_tree, candidates));
|
||||
@ -160,13 +162,13 @@ impl<'t> Criterion for Attribute<'t> {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
|
@ -8,6 +8,7 @@ use roaring::RoaringBitmap;
|
||||
|
||||
use crate::search::criteria::{
|
||||
resolve_phrase, resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
|
||||
InitialCandidates,
|
||||
};
|
||||
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
|
||||
use crate::{absolute_from_relative_position, FieldId, Result};
|
||||
@ -16,7 +17,7 @@ pub struct Exactness<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
query_tree: Option<Operation>,
|
||||
state: Option<State>,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
initial_candidates: InitialCandidates,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
query: Vec<ExactQueryPart>,
|
||||
}
|
||||
@ -36,7 +37,7 @@ impl<'t> Exactness<'t> {
|
||||
ctx,
|
||||
query_tree: None,
|
||||
state: None,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
parent,
|
||||
query,
|
||||
})
|
||||
@ -68,7 +69,7 @@ impl<'t> Criterion for Exactness<'t> {
|
||||
query_tree: self.query_tree.clone(),
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
@ -76,7 +77,7 @@ impl<'t> Criterion for Exactness<'t> {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
@ -90,9 +91,11 @@ impl<'t> Criterion for Exactness<'t> {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
self.state = Some(State::new(candidates));
|
||||
@ -102,13 +105,13 @@ impl<'t> Criterion for Exactness<'t> {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
|
@ -2,6 +2,7 @@ use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::InitialCandidates;
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::WordDerivationsCache;
|
||||
use crate::Result;
|
||||
@ -14,7 +15,7 @@ pub struct FinalResult {
|
||||
/// The candidates of the current bucket of the last criterion.
|
||||
pub candidates: RoaringBitmap,
|
||||
/// Candidates that comes from the current bucket of the initial criterion.
|
||||
pub bucket_candidates: RoaringBitmap,
|
||||
pub initial_candidates: InitialCandidates,
|
||||
}
|
||||
|
||||
pub struct Final<'t> {
|
||||
@ -49,7 +50,7 @@ impl<'t> Final<'t> {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match (candidates, query_tree.as_ref()) {
|
||||
(Some(candidates), _) => candidates,
|
||||
@ -63,11 +64,12 @@ impl<'t> Final<'t> {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone());
|
||||
let initial_candidates = initial_candidates
|
||||
.unwrap_or_else(|| InitialCandidates::Estimated(candidates.clone()));
|
||||
|
||||
self.returned_candidates |= &candidates;
|
||||
|
||||
Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }))
|
||||
Ok(Some(FinalResult { query_tree, candidates, initial_candidates }))
|
||||
}
|
||||
None => Ok(None),
|
||||
}
|
||||
|
@ -4,7 +4,7 @@ use roaring::RoaringBitmap;
|
||||
use rstar::RTree;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
||||
use crate::{lat_lng_to_xyz, GeoPoint, Index, Result};
|
||||
|
||||
pub struct Geo<'t> {
|
||||
@ -14,7 +14,7 @@ pub struct Geo<'t> {
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates: Box<dyn Iterator<Item = RoaringBitmap>>,
|
||||
allowed_candidates: RoaringBitmap,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
initial_candidates: InitialCandidates,
|
||||
rtree: Option<RTree<GeoPoint>>,
|
||||
point: [f64; 2],
|
||||
}
|
||||
@ -47,7 +47,7 @@ impl<'t> Geo<'t> {
|
||||
) -> Result<Self> {
|
||||
let candidates = Box::new(iter::empty());
|
||||
let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?;
|
||||
let bucket_candidates = RoaringBitmap::new();
|
||||
let initial_candidates = InitialCandidates::Estimated(RoaringBitmap::new());
|
||||
let rtree = index.geo_rtree(rtxn)?;
|
||||
|
||||
Ok(Self {
|
||||
@ -57,7 +57,7 @@ impl<'t> Geo<'t> {
|
||||
parent,
|
||||
candidates,
|
||||
allowed_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
rtree,
|
||||
point,
|
||||
})
|
||||
@ -77,7 +77,7 @@ impl Criterion for Geo<'_> {
|
||||
query_tree: None,
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(self.bucket_candidates.clone()),
|
||||
initial_candidates: Some(self.initial_candidates.clone()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
@ -85,7 +85,7 @@ impl Criterion for Geo<'_> {
|
||||
query_tree,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match (&query_tree, candidates) {
|
||||
(_, Some(candidates)) => candidates,
|
||||
@ -100,9 +100,11 @@ impl Criterion for Geo<'_> {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
if candidates.is_empty() {
|
||||
|
@ -1,7 +1,7 @@
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::{resolve_query_tree, Context};
|
||||
use crate::search::criteria::{resolve_query_tree, Context, InitialCandidates};
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::Distinct;
|
||||
use crate::Result;
|
||||
@ -27,7 +27,7 @@ impl<'t, D> Initial<'t, D> {
|
||||
query_tree,
|
||||
candidates: None,
|
||||
filtered_candidates,
|
||||
bucket_candidates: None,
|
||||
initial_candidates: None,
|
||||
};
|
||||
Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
|
||||
}
|
||||
@ -41,32 +41,34 @@ impl<D: Distinct> Criterion for Initial<'_, D> {
|
||||
.map(|mut answer| {
|
||||
if self.exhaustive_number_hits && answer.query_tree.is_some() {
|
||||
// resolve the whole query tree to retrieve an exhaustive list of documents matching the query.
|
||||
// then remove the potential soft deleted documents.
|
||||
let mut candidates = resolve_query_tree(
|
||||
self.ctx,
|
||||
answer.query_tree.as_ref().unwrap(),
|
||||
params.wdcache,
|
||||
)?;
|
||||
)? - params.excluded_candidates;
|
||||
|
||||
// Apply the filters on the documents retrieved with the query tree.
|
||||
if let Some(ref filtered_candidates) = answer.filtered_candidates {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
// because the bucket_candidates should be an exhaustive count of the matching documents,
|
||||
// because the initial_candidates should be an exhaustive count of the matching documents,
|
||||
// we precompute the distinct attributes.
|
||||
let bucket_candidates = match &mut self.distinct {
|
||||
let initial_candidates = match &mut self.distinct {
|
||||
Some(distinct) => {
|
||||
let mut bucket_candidates = RoaringBitmap::new();
|
||||
let mut initial_candidates = RoaringBitmap::new();
|
||||
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
|
||||
bucket_candidates.insert(c?);
|
||||
initial_candidates.insert(c?);
|
||||
}
|
||||
bucket_candidates
|
||||
initial_candidates
|
||||
}
|
||||
None => candidates.clone(),
|
||||
};
|
||||
|
||||
answer.candidates = Some(candidates);
|
||||
answer.bucket_candidates = Some(bucket_candidates);
|
||||
answer.initial_candidates =
|
||||
Some(InitialCandidates::Exhaustive(initial_candidates));
|
||||
}
|
||||
Ok(answer)
|
||||
})
|
||||
|
@ -1,5 +1,7 @@
|
||||
use std::borrow::Cow;
|
||||
use std::collections::HashMap;
|
||||
use std::mem::take;
|
||||
use std::ops::{BitOr, BitOrAssign};
|
||||
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
@ -41,7 +43,7 @@ pub struct CriterionResult {
|
||||
/// The candidates, coming from facet filters, that this criterion is allowed to return subsets of.
|
||||
filtered_candidates: Option<RoaringBitmap>,
|
||||
/// Candidates that comes from the current bucket of the initial criterion.
|
||||
bucket_candidates: Option<RoaringBitmap>,
|
||||
initial_candidates: Option<InitialCandidates>,
|
||||
}
|
||||
|
||||
#[derive(Debug, PartialEq)]
|
||||
@ -65,6 +67,71 @@ impl Default for Candidates {
|
||||
}
|
||||
}
|
||||
|
||||
/// Either a set of candidates that defines the estimated set of candidates
|
||||
/// that could be returned,
|
||||
/// or the Exhaustive set of candidates that will be returned if all possible results are fetched.
|
||||
#[derive(Debug, Clone, PartialEq)]
|
||||
pub enum InitialCandidates {
|
||||
Estimated(RoaringBitmap),
|
||||
Exhaustive(RoaringBitmap),
|
||||
}
|
||||
|
||||
impl InitialCandidates {
|
||||
fn take(&mut self) -> Self {
|
||||
match self {
|
||||
Self::Estimated(c) => Self::Estimated(take(c)),
|
||||
Self::Exhaustive(c) => Self::Exhaustive(take(c)),
|
||||
}
|
||||
}
|
||||
|
||||
/// modify the containing roaring bitmap inplace if the set isn't already Exhaustive.
|
||||
pub fn map_inplace<F>(&mut self, f: F)
|
||||
where
|
||||
F: FnOnce(RoaringBitmap) -> RoaringBitmap,
|
||||
{
|
||||
if let Self::Estimated(c) = self {
|
||||
*c = f(take(c))
|
||||
}
|
||||
}
|
||||
|
||||
pub fn into_inner(self) -> RoaringBitmap {
|
||||
match self {
|
||||
Self::Estimated(c) => c,
|
||||
Self::Exhaustive(c) => c,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BitOrAssign for InitialCandidates {
|
||||
/// Make an union between the containing roaring bitmaps if the set isn't already Exhaustive.
|
||||
/// In the case of rhs is Exhaustive and not self, then rhs replaces self.
|
||||
fn bitor_assign(&mut self, rhs: Self) {
|
||||
if let Self::Estimated(c) = self {
|
||||
*self = match rhs {
|
||||
Self::Estimated(rhs) => Self::Estimated(rhs | &*c),
|
||||
Self::Exhaustive(rhs) => Self::Exhaustive(rhs),
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
impl BitOr for InitialCandidates {
|
||||
type Output = Self;
|
||||
|
||||
/// Make an union between the containing roaring bitmaps if the set isn't already Exhaustive.
|
||||
/// In the case of rhs is Exhaustive and not self, then rhs replaces self.
|
||||
fn bitor(self, rhs: Self) -> Self::Output {
|
||||
if let Self::Estimated(c) = self {
|
||||
match rhs {
|
||||
Self::Estimated(rhs) => Self::Estimated(rhs | c),
|
||||
Self::Exhaustive(rhs) => Self::Exhaustive(rhs),
|
||||
}
|
||||
} else {
|
||||
self.clone()
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
pub trait Context<'c> {
|
||||
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
|
||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||
|
@ -1,6 +1,5 @@
|
||||
use std::collections::btree_map::{self, BTreeMap};
|
||||
use std::collections::hash_map::HashMap;
|
||||
use std::mem::take;
|
||||
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
@ -10,6 +9,7 @@ use super::{
|
||||
query_docids, query_pair_proximity_docids, resolve_phrase, resolve_query_tree, Context,
|
||||
Criterion, CriterionParameters, CriterionResult,
|
||||
};
|
||||
use crate::search::criteria::InitialCandidates;
|
||||
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
|
||||
use crate::search::{build_dfa, WordDerivationsCache};
|
||||
use crate::{Position, Result};
|
||||
@ -29,7 +29,7 @@ pub struct Proximity<'t> {
|
||||
/// (max_proximity, query_tree, allowed_candidates)
|
||||
state: Option<(u8, Operation, RoaringBitmap)>,
|
||||
proximity: u8,
|
||||
bucket_candidates: RoaringBitmap,
|
||||
initial_candidates: InitialCandidates,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates_cache: Cache,
|
||||
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
|
||||
@ -41,7 +41,7 @@ impl<'t> Proximity<'t> {
|
||||
ctx,
|
||||
state: None,
|
||||
proximity: 0,
|
||||
bucket_candidates: RoaringBitmap::new(),
|
||||
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||
parent,
|
||||
candidates_cache: Cache::new(),
|
||||
plane_sweep_cache: None,
|
||||
@ -115,7 +115,7 @@ impl<'t> Criterion for Proximity<'t> {
|
||||
query_tree: Some(query_tree.clone()),
|
||||
candidates: Some(new_candidates),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
||||
initial_candidates: Some(self.initial_candidates.take()),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
@ -123,7 +123,7 @@ impl<'t> Criterion for Proximity<'t> {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
let mut candidates = match candidates {
|
||||
Some(candidates) => candidates,
|
||||
@ -137,9 +137,11 @@ impl<'t> Criterion for Proximity<'t> {
|
||||
candidates &= filtered_candidates;
|
||||
}
|
||||
|
||||
match bucket_candidates {
|
||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
||||
None => self.bucket_candidates |= &candidates,
|
||||
match initial_candidates {
|
||||
Some(initial_candidates) => {
|
||||
self.initial_candidates |= initial_candidates
|
||||
}
|
||||
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||
}
|
||||
|
||||
let maximum_proximity = maximum_proximity(&query_tree);
|
||||
@ -151,13 +153,13 @@ impl<'t> Criterion for Proximity<'t> {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
|
@ -9,7 +9,7 @@ use super::{
|
||||
query_docids, resolve_query_tree, Candidates, Context, Criterion, CriterionParameters,
|
||||
CriterionResult,
|
||||
};
|
||||
use crate::search::criteria::resolve_phrase;
|
||||
use crate::search::criteria::{resolve_phrase, InitialCandidates};
|
||||
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
|
||||
use crate::search::{word_derivations, WordDerivationsCache};
|
||||
use crate::Result;
|
||||
@ -22,7 +22,7 @@ pub struct Typo<'t> {
|
||||
/// (max_typos, query_tree, candidates)
|
||||
state: Option<(u8, Operation, Candidates)>,
|
||||
typos: u8,
|
||||
bucket_candidates: Option<RoaringBitmap>,
|
||||
initial_candidates: Option<InitialCandidates>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
|
||||
}
|
||||
@ -33,7 +33,7 @@ impl<'t> Typo<'t> {
|
||||
ctx,
|
||||
state: None,
|
||||
typos: 0,
|
||||
bucket_candidates: None,
|
||||
initial_candidates: None,
|
||||
parent,
|
||||
candidates_cache: HashMap::new(),
|
||||
}
|
||||
@ -120,9 +120,9 @@ impl<'t> Criterion for Typo<'t> {
|
||||
}
|
||||
}
|
||||
|
||||
let bucket_candidates = match self.bucket_candidates.as_mut() {
|
||||
Some(bucket_candidates) => take(bucket_candidates),
|
||||
None => candidates.clone(),
|
||||
let initial_candidates = match self.initial_candidates.as_mut() {
|
||||
Some(initial_candidates) => initial_candidates.take(),
|
||||
None => InitialCandidates::Estimated(candidates.clone()),
|
||||
};
|
||||
|
||||
self.typos += 1;
|
||||
@ -131,7 +131,7 @@ impl<'t> Criterion for Typo<'t> {
|
||||
query_tree: Some(new_query_tree),
|
||||
candidates: Some(candidates),
|
||||
filtered_candidates: None,
|
||||
bucket_candidates: Some(bucket_candidates),
|
||||
initial_candidates: Some(initial_candidates),
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
@ -139,14 +139,9 @@ impl<'t> Criterion for Typo<'t> {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
self.bucket_candidates =
|
||||
match (self.bucket_candidates.take(), bucket_candidates) {
|
||||
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
|
||||
(self_bc, parent_bc) => self_bc.or(parent_bc),
|
||||
};
|
||||
|
||||
self.initial_candidates = initial_candidates;
|
||||
let candidates = match candidates.or(filtered_candidates) {
|
||||
Some(candidates) => {
|
||||
Candidates::Allowed(candidates - params.excluded_candidates)
|
||||
@ -162,13 +157,13 @@ impl<'t> Criterion for Typo<'t> {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
@ -356,7 +351,7 @@ mod test {
|
||||
|
||||
let result = display_criteria(criteria, criterion_parameters);
|
||||
insta::assert_snapshot!(result, @r###"
|
||||
CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, bucket_candidates: None }
|
||||
CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, initial_candidates: None }
|
||||
|
||||
"###);
|
||||
}
|
||||
@ -399,7 +394,7 @@ mod test {
|
||||
Exact { word: "split" }
|
||||
Exact { word: "this" }
|
||||
Exact { word: "world" }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||
|
||||
CriterionResult { query_tree: Some(OR
|
||||
AND
|
||||
@ -408,7 +403,7 @@ mod test {
|
||||
OR
|
||||
Exact { word: "word" }
|
||||
Exact { word: "world" }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||
|
||||
"###);
|
||||
}
|
||||
@ -434,7 +429,7 @@ mod test {
|
||||
|
||||
let result = display_criteria(criteria, criterion_parameters);
|
||||
insta::assert_snapshot!(result, @r###"
|
||||
CriterionResult { query_tree: None, candidates: None, filtered_candidates: Some(RoaringBitmap<8000 values between 986424 and 4294786076>), bucket_candidates: None }
|
||||
CriterionResult { query_tree: None, candidates: None, filtered_candidates: Some(RoaringBitmap<8000 values between 986424 and 4294786076>), initial_candidates: None }
|
||||
|
||||
"###);
|
||||
}
|
||||
@ -482,7 +477,7 @@ mod test {
|
||||
Exact { word: "split" }
|
||||
Exact { word: "this" }
|
||||
Exact { word: "world" }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||
|
||||
CriterionResult { query_tree: Some(OR
|
||||
AND
|
||||
@ -491,7 +486,7 @@ mod test {
|
||||
OR
|
||||
Exact { word: "word" }
|
||||
Exact { word: "world" }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
|
||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||
|
||||
"###);
|
||||
}
|
||||
|
@ -1,9 +1,8 @@
|
||||
use std::mem::take;
|
||||
|
||||
use log::debug;
|
||||
use roaring::RoaringBitmap;
|
||||
|
||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::InitialCandidates;
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::Result;
|
||||
|
||||
@ -11,7 +10,7 @@ pub struct Words<'t> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
query_trees: Vec<Operation>,
|
||||
candidates: Option<RoaringBitmap>,
|
||||
bucket_candidates: Option<RoaringBitmap>,
|
||||
initial_candidates: Option<InitialCandidates>,
|
||||
filtered_candidates: Option<RoaringBitmap>,
|
||||
parent: Box<dyn Criterion + 't>,
|
||||
}
|
||||
@ -22,7 +21,7 @@ impl<'t> Words<'t> {
|
||||
ctx,
|
||||
query_trees: Vec::default(),
|
||||
candidates: None,
|
||||
bucket_candidates: None,
|
||||
initial_candidates: None,
|
||||
parent,
|
||||
filtered_candidates: None,
|
||||
}
|
||||
@ -53,13 +52,13 @@ impl<'t> Criterion for Words<'t> {
|
||||
None => None,
|
||||
};
|
||||
|
||||
let bucket_candidates = self.bucket_candidates.as_mut().map(take);
|
||||
let initial_candidates = self.initial_candidates.clone();
|
||||
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates: self.filtered_candidates.clone(),
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => match self.parent.next(params)? {
|
||||
@ -67,14 +66,14 @@ impl<'t> Criterion for Words<'t> {
|
||||
query_tree: Some(query_tree),
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
self.query_trees = explode_query_tree(query_tree);
|
||||
self.candidates = candidates;
|
||||
self.filtered_candidates = filtered_candidates;
|
||||
|
||||
self.bucket_candidates =
|
||||
match (self.bucket_candidates.take(), bucket_candidates) {
|
||||
self.initial_candidates =
|
||||
match (self.initial_candidates.take(), initial_candidates) {
|
||||
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
|
||||
(self_bc, parent_bc) => self_bc.or(parent_bc),
|
||||
};
|
||||
@ -83,13 +82,13 @@ impl<'t> Criterion for Words<'t> {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}) => {
|
||||
return Ok(Some(CriterionResult {
|
||||
query_tree: None,
|
||||
candidates,
|
||||
filtered_candidates,
|
||||
bucket_candidates,
|
||||
initial_candidates,
|
||||
}));
|
||||
}
|
||||
None => return Ok(None),
|
||||
|
@ -23,6 +23,7 @@ pub use self::matches::{
|
||||
use self::query_tree::QueryTreeBuilder;
|
||||
use crate::error::UserError;
|
||||
use crate::search::criteria::r#final::{Final, FinalResult};
|
||||
use crate::search::criteria::InitialCandidates;
|
||||
use crate::{AscDesc, Criterion, DocumentId, Index, Member, Result};
|
||||
|
||||
// Building these factories is not free.
|
||||
@ -235,11 +236,11 @@ impl<'a> Search<'a> {
|
||||
mut criteria: Final,
|
||||
) -> Result<SearchResult> {
|
||||
let mut offset = self.offset;
|
||||
let mut initial_candidates = RoaringBitmap::new();
|
||||
let mut initial_candidates = InitialCandidates::Estimated(RoaringBitmap::new());
|
||||
let mut excluded_candidates = self.index.soft_deleted_documents_ids(self.rtxn)?;
|
||||
let mut documents_ids = Vec::new();
|
||||
|
||||
while let Some(FinalResult { candidates, bucket_candidates, .. }) =
|
||||
while let Some(FinalResult { candidates, initial_candidates: ic, .. }) =
|
||||
criteria.next(&excluded_candidates)?
|
||||
{
|
||||
debug!("Number of candidates found {}", candidates.len());
|
||||
@ -247,7 +248,7 @@ impl<'a> Search<'a> {
|
||||
let excluded = take(&mut excluded_candidates);
|
||||
let mut candidates = distinct.distinct(candidates, excluded);
|
||||
|
||||
initial_candidates |= bucket_candidates;
|
||||
initial_candidates |= ic;
|
||||
|
||||
if offset != 0 {
|
||||
let discarded = candidates.by_ref().take(offset).count();
|
||||
@ -265,9 +266,11 @@ impl<'a> Search<'a> {
|
||||
}
|
||||
}
|
||||
|
||||
initial_candidates.map_inplace(|c| c - excluded_candidates);
|
||||
|
||||
Ok(SearchResult {
|
||||
matching_words,
|
||||
candidates: initial_candidates - excluded_candidates,
|
||||
candidates: initial_candidates.into_inner(),
|
||||
documents_ids,
|
||||
})
|
||||
}
|
||||
|
@ -8,7 +8,7 @@ use Criterion::*;
|
||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||
|
||||
macro_rules! test_distinct {
|
||||
($func:ident, $distinct:ident, $criteria:expr, $n_res:expr) => {
|
||||
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $criteria:expr, $n_res:expr) => {
|
||||
#[test]
|
||||
fn $func() {
|
||||
let criteria = $criteria;
|
||||
@ -26,7 +26,8 @@ macro_rules! test_distinct {
|
||||
|
||||
let mut search = Search::new(&rtxn, &index);
|
||||
search.query(search::TEST_QUERY);
|
||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
||||
search.limit($limit);
|
||||
search.exhaustive_number_hits($exhaustive);
|
||||
search.authorize_typos(true);
|
||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||
|
||||
@ -46,6 +47,7 @@ macro_rules! test_distinct {
|
||||
Some(d.id)
|
||||
}
|
||||
})
|
||||
.take($limit)
|
||||
.collect();
|
||||
|
||||
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
||||
@ -54,25 +56,116 @@ macro_rules! test_distinct {
|
||||
};
|
||||
}
|
||||
|
||||
test_distinct!(
|
||||
exhaustive_distinct_string_default_criteria,
|
||||
tag,
|
||||
true,
|
||||
1,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
exhaustive_distinct_number_default_criteria,
|
||||
asc_desc_rank,
|
||||
true,
|
||||
1,
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
7
|
||||
);
|
||||
|
||||
test_distinct!(
|
||||
distinct_string_default_criteria,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_default_criteria,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||
7
|
||||
);
|
||||
test_distinct!(distinct_string_criterion_words, tag, vec![Words], 3);
|
||||
test_distinct!(distinct_number_criterion_words, asc_desc_rank, vec![Words], 7);
|
||||
test_distinct!(distinct_string_criterion_words_typo, tag, vec![Words, Typo], 3);
|
||||
test_distinct!(distinct_number_criterion_words_typo, asc_desc_rank, vec![Words, Typo], 7);
|
||||
test_distinct!(distinct_string_criterion_words_proximity, tag, vec![Words, Proximity], 3);
|
||||
test_distinct!(distinct_number_criterion_words_proximity, asc_desc_rank, vec![Words, Proximity], 7);
|
||||
test_distinct!(distinct_string_criterion_words_attribute, tag, vec![Words, Attribute], 3);
|
||||
test_distinct!(distinct_number_criterion_words_attribute, asc_desc_rank, vec![Words, Attribute], 7);
|
||||
test_distinct!(distinct_string_criterion_words_exactness, tag, vec![Words, Exactness], 3);
|
||||
test_distinct!(distinct_number_criterion_words_exactness, asc_desc_rank, vec![Words, Exactness], 7);
|
||||
test_distinct!(
|
||||
distinct_string_criterion_words,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_criterion_words,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_string_criterion_words_typo,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words, Typo],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_criterion_words_typo,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words, Typo],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_string_criterion_words_proximity,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words, Proximity],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_criterion_words_proximity,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words, Proximity],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_string_criterion_words_attribute,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words, Attribute],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_criterion_words_attribute,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words, Attribute],
|
||||
7
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_string_criterion_words_exactness,
|
||||
tag,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words, Exactness],
|
||||
3
|
||||
);
|
||||
test_distinct!(
|
||||
distinct_number_criterion_words_exactness,
|
||||
asc_desc_rank,
|
||||
false,
|
||||
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||
vec![Words, Exactness],
|
||||
7
|
||||
);
|
||||
|
Loading…
Reference in New Issue
Block a user