Introduce an initial candidates set that makes the difference between an exhaustive count and an estimation

This commit is contained in:
ManyTheFish 2022-12-07 18:29:25 +01:00
parent 6d50ea0830
commit 55724f2412
11 changed files with 180 additions and 101 deletions

View File

@ -9,7 +9,7 @@ use super::{Criterion, CriterionParameters, CriterionResult};
use crate::facet::FacetType; use crate::facet::FacetType;
use crate::heed_codec::facet::FacetGroupKeyCodec; use crate::heed_codec::facet::FacetGroupKeyCodec;
use crate::heed_codec::ByteSliceRefCodec; use crate::heed_codec::ByteSliceRefCodec;
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
use crate::search::facet::{ascending_facet_sort, descending_facet_sort}; use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::{FieldId, Index, Result}; use crate::{FieldId, Index, Result};
@ -27,7 +27,7 @@ pub struct AscDesc<'t> {
query_tree: Option<Operation>, query_tree: Option<Operation>,
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>, candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
allowed_candidates: RoaringBitmap, allowed_candidates: RoaringBitmap,
bucket_candidates: RoaringBitmap, initial_candidates: InitialCandidates,
faceted_candidates: RoaringBitmap, faceted_candidates: RoaringBitmap,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
} }
@ -81,7 +81,7 @@ impl<'t> AscDesc<'t> {
candidates: Box::new(std::iter::empty()), candidates: Box::new(std::iter::empty()),
allowed_candidates: RoaringBitmap::new(), allowed_candidates: RoaringBitmap::new(),
faceted_candidates, faceted_candidates,
bucket_candidates: RoaringBitmap::new(), initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
parent, parent,
}) })
} }
@ -106,7 +106,7 @@ impl<'t> Criterion for AscDesc<'t> {
query_tree: self.query_tree.clone(), query_tree: self.query_tree.clone(),
candidates: Some(take(&mut self.allowed_candidates)), candidates: Some(take(&mut self.allowed_candidates)),
filtered_candidates: None, filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), initial_candidates: Some(self.initial_candidates.take()),
})); }));
} }
None => match self.parent.next(params)? { None => match self.parent.next(params)? {
@ -114,7 +114,7 @@ impl<'t> Criterion for AscDesc<'t> {
query_tree, query_tree,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
self.query_tree = query_tree; self.query_tree = query_tree;
let mut candidates = match (&self.query_tree, candidates) { let mut candidates = match (&self.query_tree, candidates) {
@ -130,9 +130,11 @@ impl<'t> Criterion for AscDesc<'t> {
candidates &= filtered_candidates; candidates &= filtered_candidates;
} }
match bucket_candidates { match initial_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates, Some(initial_candidates) => {
None => self.bucket_candidates |= &candidates, self.initial_candidates |= initial_candidates
}
None => self.initial_candidates.map_inplace(|c| c | &candidates),
} }
if candidates.is_empty() { if candidates.is_empty() {
@ -160,7 +162,7 @@ impl<'t> Criterion for AscDesc<'t> {
query_tree: self.query_tree.clone(), query_tree: self.query_tree.clone(),
candidates: Some(candidates), candidates: Some(candidates),
filtered_candidates: None, filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), initial_candidates: Some(self.initial_candidates.take()),
})); }));
} }
} }

View File

@ -7,7 +7,7 @@ use std::mem::take;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult}; use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::Query; use crate::search::criteria::{InitialCandidates, Query};
use crate::search::query_tree::{Operation, QueryKind}; use crate::search::query_tree::{Operation, QueryKind};
use crate::search::{build_dfa, word_derivations, WordDerivationsCache}; use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
use crate::Result; use crate::Result;
@ -26,7 +26,7 @@ type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
pub struct Attribute<'t> { pub struct Attribute<'t> {
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>, state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
bucket_candidates: RoaringBitmap, initial_candidates: InitialCandidates,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>, linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
set_buckets: Option<BinaryHeap<Branch<'t>>>, set_buckets: Option<BinaryHeap<Branch<'t>>>,
@ -37,7 +37,7 @@ impl<'t> Attribute<'t> {
Attribute { Attribute {
ctx, ctx,
state: None, state: None,
bucket_candidates: RoaringBitmap::new(), initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
parent, parent,
linear_buckets: None, linear_buckets: None,
set_buckets: None, set_buckets: None,
@ -60,7 +60,7 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()), candidates: Some(RoaringBitmap::new()),
filtered_candidates: None, filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), initial_candidates: Some(self.initial_candidates.take()),
})); }));
} }
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => { Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
@ -84,7 +84,7 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()), candidates: Some(RoaringBitmap::new()),
filtered_candidates: None, filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), initial_candidates: Some(self.initial_candidates.take()),
})); }));
} }
} }
@ -109,7 +109,7 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates: Some(RoaringBitmap::new()), candidates: Some(RoaringBitmap::new()),
filtered_candidates: None, filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), initial_candidates: Some(self.initial_candidates.take()),
})); }));
} }
} }
@ -124,7 +124,7 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates: Some(found_candidates), candidates: Some(found_candidates),
filtered_candidates: None, filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), initial_candidates: Some(self.initial_candidates.take()),
})); }));
} }
None => match self.parent.next(params)? { None => match self.parent.next(params)? {
@ -132,7 +132,7 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
let mut candidates = match candidates { let mut candidates = match candidates {
Some(candidates) => candidates, Some(candidates) => candidates,
@ -148,9 +148,11 @@ impl<'t> Criterion for Attribute<'t> {
let flattened_query_tree = flatten_query_tree(&query_tree); let flattened_query_tree = flatten_query_tree(&query_tree);
match bucket_candidates { match initial_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates, Some(initial_candidates) => {
None => self.bucket_candidates |= &candidates, self.initial_candidates |= initial_candidates
}
None => self.initial_candidates.map_inplace(|c| c | &candidates),
} }
self.state = Some((query_tree, flattened_query_tree, candidates)); self.state = Some((query_tree, flattened_query_tree, candidates));
@ -160,13 +162,13 @@ impl<'t> Criterion for Attribute<'t> {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
})); }));
} }
None => return Ok(None), None => return Ok(None),

View File

@ -8,6 +8,7 @@ use roaring::RoaringBitmap;
use crate::search::criteria::{ use crate::search::criteria::{
resolve_phrase, resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult, resolve_phrase, resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
InitialCandidates,
}; };
use crate::search::query_tree::{Operation, PrimitiveQueryPart}; use crate::search::query_tree::{Operation, PrimitiveQueryPart};
use crate::{absolute_from_relative_position, FieldId, Result}; use crate::{absolute_from_relative_position, FieldId, Result};
@ -16,7 +17,7 @@ pub struct Exactness<'t> {
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
query_tree: Option<Operation>, query_tree: Option<Operation>,
state: Option<State>, state: Option<State>,
bucket_candidates: RoaringBitmap, initial_candidates: InitialCandidates,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
query: Vec<ExactQueryPart>, query: Vec<ExactQueryPart>,
} }
@ -36,7 +37,7 @@ impl<'t> Exactness<'t> {
ctx, ctx,
query_tree: None, query_tree: None,
state: None, state: None,
bucket_candidates: RoaringBitmap::new(), initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
parent, parent,
query, query,
}) })
@ -68,7 +69,7 @@ impl<'t> Criterion for Exactness<'t> {
query_tree: self.query_tree.clone(), query_tree: self.query_tree.clone(),
candidates: Some(candidates), candidates: Some(candidates),
filtered_candidates: None, filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), initial_candidates: Some(self.initial_candidates.take()),
})); }));
} }
None => match self.parent.next(params)? { None => match self.parent.next(params)? {
@ -76,7 +77,7 @@ impl<'t> Criterion for Exactness<'t> {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
let mut candidates = match candidates { let mut candidates = match candidates {
Some(candidates) => candidates, Some(candidates) => candidates,
@ -90,9 +91,11 @@ impl<'t> Criterion for Exactness<'t> {
candidates &= filtered_candidates; candidates &= filtered_candidates;
} }
match bucket_candidates { match initial_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates, Some(initial_candidates) => {
None => self.bucket_candidates |= &candidates, self.initial_candidates |= initial_candidates
}
None => self.initial_candidates.map_inplace(|c| c | &candidates),
} }
self.state = Some(State::new(candidates)); self.state = Some(State::new(candidates));
@ -102,13 +105,13 @@ impl<'t> Criterion for Exactness<'t> {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
})); }));
} }
None => return Ok(None), None => return Ok(None),

View File

@ -2,6 +2,7 @@ use log::debug;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult}; use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::InitialCandidates;
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::search::WordDerivationsCache; use crate::search::WordDerivationsCache;
use crate::Result; use crate::Result;
@ -14,7 +15,7 @@ pub struct FinalResult {
/// The candidates of the current bucket of the last criterion. /// The candidates of the current bucket of the last criterion.
pub candidates: RoaringBitmap, pub candidates: RoaringBitmap,
/// Candidates that comes from the current bucket of the initial criterion. /// Candidates that comes from the current bucket of the initial criterion.
pub bucket_candidates: RoaringBitmap, pub initial_candidates: InitialCandidates,
} }
pub struct Final<'t> { pub struct Final<'t> {
@ -49,7 +50,7 @@ impl<'t> Final<'t> {
query_tree, query_tree,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
let mut candidates = match (candidates, query_tree.as_ref()) { let mut candidates = match (candidates, query_tree.as_ref()) {
(Some(candidates), _) => candidates, (Some(candidates), _) => candidates,
@ -63,11 +64,12 @@ impl<'t> Final<'t> {
candidates &= filtered_candidates; candidates &= filtered_candidates;
} }
let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone()); let initial_candidates = initial_candidates
.unwrap_or_else(|| InitialCandidates::Estimated(candidates.clone()));
self.returned_candidates |= &candidates; self.returned_candidates |= &candidates;
Ok(Some(FinalResult { query_tree, candidates, bucket_candidates })) Ok(Some(FinalResult { query_tree, candidates, initial_candidates }))
} }
None => Ok(None), None => Ok(None),
} }

View File

@ -4,7 +4,7 @@ use roaring::RoaringBitmap;
use rstar::RTree; use rstar::RTree;
use super::{Criterion, CriterionParameters, CriterionResult}; use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder}; use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
use crate::{lat_lng_to_xyz, GeoPoint, Index, Result}; use crate::{lat_lng_to_xyz, GeoPoint, Index, Result};
pub struct Geo<'t> { pub struct Geo<'t> {
@ -14,7 +14,7 @@ pub struct Geo<'t> {
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
candidates: Box<dyn Iterator<Item = RoaringBitmap>>, candidates: Box<dyn Iterator<Item = RoaringBitmap>>,
allowed_candidates: RoaringBitmap, allowed_candidates: RoaringBitmap,
bucket_candidates: RoaringBitmap, initial_candidates: InitialCandidates,
rtree: Option<RTree<GeoPoint>>, rtree: Option<RTree<GeoPoint>>,
point: [f64; 2], point: [f64; 2],
} }
@ -47,7 +47,7 @@ impl<'t> Geo<'t> {
) -> Result<Self> { ) -> Result<Self> {
let candidates = Box::new(iter::empty()); let candidates = Box::new(iter::empty());
let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?; let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?;
let bucket_candidates = RoaringBitmap::new(); let initial_candidates = InitialCandidates::Estimated(RoaringBitmap::new());
let rtree = index.geo_rtree(rtxn)?; let rtree = index.geo_rtree(rtxn)?;
Ok(Self { Ok(Self {
@ -57,7 +57,7 @@ impl<'t> Geo<'t> {
parent, parent,
candidates, candidates,
allowed_candidates, allowed_candidates,
bucket_candidates, initial_candidates,
rtree, rtree,
point, point,
}) })
@ -77,7 +77,7 @@ impl Criterion for Geo<'_> {
query_tree: None, query_tree: None,
candidates: Some(candidates), candidates: Some(candidates),
filtered_candidates: None, filtered_candidates: None,
bucket_candidates: Some(self.bucket_candidates.clone()), initial_candidates: Some(self.initial_candidates.clone()),
})); }));
} }
None => match self.parent.next(params)? { None => match self.parent.next(params)? {
@ -85,7 +85,7 @@ impl Criterion for Geo<'_> {
query_tree, query_tree,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
let mut candidates = match (&query_tree, candidates) { let mut candidates = match (&query_tree, candidates) {
(_, Some(candidates)) => candidates, (_, Some(candidates)) => candidates,
@ -100,9 +100,11 @@ impl Criterion for Geo<'_> {
candidates &= filtered_candidates; candidates &= filtered_candidates;
} }
match bucket_candidates { match initial_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates, Some(initial_candidates) => {
None => self.bucket_candidates |= &candidates, self.initial_candidates |= initial_candidates
}
None => self.initial_candidates.map_inplace(|c| c | &candidates),
} }
if candidates.is_empty() { if candidates.is_empty() {

View File

@ -1,7 +1,7 @@
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult}; use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, Context}; use crate::search::criteria::{resolve_query_tree, Context, InitialCandidates};
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::search::Distinct; use crate::search::Distinct;
use crate::Result; use crate::Result;
@ -27,7 +27,7 @@ impl<'t, D> Initial<'t, D> {
query_tree, query_tree,
candidates: None, candidates: None,
filtered_candidates, filtered_candidates,
bucket_candidates: None, initial_candidates: None,
}; };
Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct } Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
} }
@ -41,32 +41,34 @@ impl<D: Distinct> Criterion for Initial<'_, D> {
.map(|mut answer| { .map(|mut answer| {
if self.exhaustive_number_hits && answer.query_tree.is_some() { if self.exhaustive_number_hits && answer.query_tree.is_some() {
// resolve the whole query tree to retrieve an exhaustive list of documents matching the query. // resolve the whole query tree to retrieve an exhaustive list of documents matching the query.
// then remove the potential soft deleted documents.
let mut candidates = resolve_query_tree( let mut candidates = resolve_query_tree(
self.ctx, self.ctx,
answer.query_tree.as_ref().unwrap(), answer.query_tree.as_ref().unwrap(),
params.wdcache, params.wdcache,
)?; )? - params.excluded_candidates;
// Apply the filters on the documents retrieved with the query tree. // Apply the filters on the documents retrieved with the query tree.
if let Some(ref filtered_candidates) = answer.filtered_candidates { if let Some(ref filtered_candidates) = answer.filtered_candidates {
candidates &= filtered_candidates; candidates &= filtered_candidates;
} }
// because the bucket_candidates should be an exhaustive count of the matching documents, // because the initial_candidates should be an exhaustive count of the matching documents,
// we precompute the distinct attributes. // we precompute the distinct attributes.
let bucket_candidates = match &mut self.distinct { let initial_candidates = match &mut self.distinct {
Some(distinct) => { Some(distinct) => {
let mut bucket_candidates = RoaringBitmap::new(); let mut initial_candidates = RoaringBitmap::new();
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) { for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
bucket_candidates.insert(c?); initial_candidates.insert(c?);
} }
bucket_candidates initial_candidates
} }
None => candidates.clone(), None => candidates.clone(),
}; };
answer.candidates = Some(candidates); answer.candidates = Some(candidates);
answer.bucket_candidates = Some(bucket_candidates); answer.initial_candidates =
Some(InitialCandidates::Exhaustive(initial_candidates));
} }
Ok(answer) Ok(answer)
}) })

View File

@ -1,5 +1,7 @@
use std::borrow::Cow; use std::borrow::Cow;
use std::collections::HashMap; use std::collections::HashMap;
use std::mem::take;
use std::ops::{BitOr, BitOrAssign};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -41,7 +43,7 @@ pub struct CriterionResult {
/// The candidates, coming from facet filters, that this criterion is allowed to return subsets of. /// The candidates, coming from facet filters, that this criterion is allowed to return subsets of.
filtered_candidates: Option<RoaringBitmap>, filtered_candidates: Option<RoaringBitmap>,
/// Candidates that comes from the current bucket of the initial criterion. /// Candidates that comes from the current bucket of the initial criterion.
bucket_candidates: Option<RoaringBitmap>, initial_candidates: Option<InitialCandidates>,
} }
#[derive(Debug, PartialEq)] #[derive(Debug, PartialEq)]
@ -65,6 +67,71 @@ impl Default for Candidates {
} }
} }
/// Either a set of candidates that defines the estimated set of candidates
/// that could be returned,
/// or the Exhaustive set of candidates that will be returned if all possible results are fetched.
#[derive(Debug, Clone, PartialEq)]
pub enum InitialCandidates {
Estimated(RoaringBitmap),
Exhaustive(RoaringBitmap),
}
impl InitialCandidates {
fn take(&mut self) -> Self {
match self {
Self::Estimated(c) => Self::Estimated(take(c)),
Self::Exhaustive(c) => Self::Exhaustive(take(c)),
}
}
/// modify the containing roaring bitmap inplace if the set isn't already Exhaustive.
pub fn map_inplace<F>(&mut self, f: F)
where
F: FnOnce(RoaringBitmap) -> RoaringBitmap,
{
if let Self::Estimated(c) = self {
*c = f(take(c))
}
}
pub fn into_inner(self) -> RoaringBitmap {
match self {
Self::Estimated(c) => c,
Self::Exhaustive(c) => c,
}
}
}
impl BitOrAssign for InitialCandidates {
/// Make an union between the containing roaring bitmaps if the set isn't already Exhaustive.
/// In the case of rhs is Exhaustive and not self, then rhs replaces self.
fn bitor_assign(&mut self, rhs: Self) {
if let Self::Estimated(c) = self {
*self = match rhs {
Self::Estimated(rhs) => Self::Estimated(rhs | &*c),
Self::Exhaustive(rhs) => Self::Exhaustive(rhs),
}
}
}
}
impl BitOr for InitialCandidates {
type Output = Self;
/// Make an union between the containing roaring bitmaps if the set isn't already Exhaustive.
/// In the case of rhs is Exhaustive and not self, then rhs replaces self.
fn bitor(self, rhs: Self) -> Self::Output {
if let Self::Estimated(c) = self {
match rhs {
Self::Estimated(rhs) => Self::Estimated(rhs | c),
Self::Exhaustive(rhs) => Self::Exhaustive(rhs),
}
} else {
self.clone()
}
}
}
pub trait Context<'c> { pub trait Context<'c> {
fn documents_ids(&self) -> heed::Result<RoaringBitmap>; fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>; fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;

View File

@ -1,6 +1,5 @@
use std::collections::btree_map::{self, BTreeMap}; use std::collections::btree_map::{self, BTreeMap};
use std::collections::hash_map::HashMap; use std::collections::hash_map::HashMap;
use std::mem::take;
use log::debug; use log::debug;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
@ -10,6 +9,7 @@ use super::{
query_docids, query_pair_proximity_docids, resolve_phrase, resolve_query_tree, Context, query_docids, query_pair_proximity_docids, resolve_phrase, resolve_query_tree, Context,
Criterion, CriterionParameters, CriterionResult, Criterion, CriterionParameters, CriterionResult,
}; };
use crate::search::criteria::InitialCandidates;
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind}; use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
use crate::search::{build_dfa, WordDerivationsCache}; use crate::search::{build_dfa, WordDerivationsCache};
use crate::{Position, Result}; use crate::{Position, Result};
@ -29,7 +29,7 @@ pub struct Proximity<'t> {
/// (max_proximity, query_tree, allowed_candidates) /// (max_proximity, query_tree, allowed_candidates)
state: Option<(u8, Operation, RoaringBitmap)>, state: Option<(u8, Operation, RoaringBitmap)>,
proximity: u8, proximity: u8,
bucket_candidates: RoaringBitmap, initial_candidates: InitialCandidates,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
candidates_cache: Cache, candidates_cache: Cache,
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>, plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
@ -41,7 +41,7 @@ impl<'t> Proximity<'t> {
ctx, ctx,
state: None, state: None,
proximity: 0, proximity: 0,
bucket_candidates: RoaringBitmap::new(), initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
parent, parent,
candidates_cache: Cache::new(), candidates_cache: Cache::new(),
plane_sweep_cache: None, plane_sweep_cache: None,
@ -115,7 +115,7 @@ impl<'t> Criterion for Proximity<'t> {
query_tree: Some(query_tree.clone()), query_tree: Some(query_tree.clone()),
candidates: Some(new_candidates), candidates: Some(new_candidates),
filtered_candidates: None, filtered_candidates: None,
bucket_candidates: Some(take(&mut self.bucket_candidates)), initial_candidates: Some(self.initial_candidates.take()),
})); }));
} }
None => match self.parent.next(params)? { None => match self.parent.next(params)? {
@ -123,7 +123,7 @@ impl<'t> Criterion for Proximity<'t> {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
let mut candidates = match candidates { let mut candidates = match candidates {
Some(candidates) => candidates, Some(candidates) => candidates,
@ -137,9 +137,11 @@ impl<'t> Criterion for Proximity<'t> {
candidates &= filtered_candidates; candidates &= filtered_candidates;
} }
match bucket_candidates { match initial_candidates {
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates, Some(initial_candidates) => {
None => self.bucket_candidates |= &candidates, self.initial_candidates |= initial_candidates
}
None => self.initial_candidates.map_inplace(|c| c | &candidates),
} }
let maximum_proximity = maximum_proximity(&query_tree); let maximum_proximity = maximum_proximity(&query_tree);
@ -151,13 +153,13 @@ impl<'t> Criterion for Proximity<'t> {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
})); }));
} }
None => return Ok(None), None => return Ok(None),

View File

@ -9,7 +9,7 @@ use super::{
query_docids, resolve_query_tree, Candidates, Context, Criterion, CriterionParameters, query_docids, resolve_query_tree, Candidates, Context, Criterion, CriterionParameters,
CriterionResult, CriterionResult,
}; };
use crate::search::criteria::resolve_phrase; use crate::search::criteria::{resolve_phrase, InitialCandidates};
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind}; use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
use crate::search::{word_derivations, WordDerivationsCache}; use crate::search::{word_derivations, WordDerivationsCache};
use crate::Result; use crate::Result;
@ -22,7 +22,7 @@ pub struct Typo<'t> {
/// (max_typos, query_tree, candidates) /// (max_typos, query_tree, candidates)
state: Option<(u8, Operation, Candidates)>, state: Option<(u8, Operation, Candidates)>,
typos: u8, typos: u8,
bucket_candidates: Option<RoaringBitmap>, initial_candidates: Option<InitialCandidates>,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>, candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
} }
@ -33,7 +33,7 @@ impl<'t> Typo<'t> {
ctx, ctx,
state: None, state: None,
typos: 0, typos: 0,
bucket_candidates: None, initial_candidates: None,
parent, parent,
candidates_cache: HashMap::new(), candidates_cache: HashMap::new(),
} }
@ -120,9 +120,9 @@ impl<'t> Criterion for Typo<'t> {
} }
} }
let bucket_candidates = match self.bucket_candidates.as_mut() { let initial_candidates = match self.initial_candidates.as_mut() {
Some(bucket_candidates) => take(bucket_candidates), Some(initial_candidates) => initial_candidates.take(),
None => candidates.clone(), None => InitialCandidates::Estimated(candidates.clone()),
}; };
self.typos += 1; self.typos += 1;
@ -131,7 +131,7 @@ impl<'t> Criterion for Typo<'t> {
query_tree: Some(new_query_tree), query_tree: Some(new_query_tree),
candidates: Some(candidates), candidates: Some(candidates),
filtered_candidates: None, filtered_candidates: None,
bucket_candidates: Some(bucket_candidates), initial_candidates: Some(initial_candidates),
})); }));
} }
None => match self.parent.next(params)? { None => match self.parent.next(params)? {
@ -139,14 +139,9 @@ impl<'t> Criterion for Typo<'t> {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
self.bucket_candidates = self.initial_candidates = initial_candidates;
match (self.bucket_candidates.take(), bucket_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc),
};
let candidates = match candidates.or(filtered_candidates) { let candidates = match candidates.or(filtered_candidates) {
Some(candidates) => { Some(candidates) => {
Candidates::Allowed(candidates - params.excluded_candidates) Candidates::Allowed(candidates - params.excluded_candidates)
@ -162,13 +157,13 @@ impl<'t> Criterion for Typo<'t> {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
})); }));
} }
None => return Ok(None), None => return Ok(None),
@ -356,7 +351,7 @@ mod test {
let result = display_criteria(criteria, criterion_parameters); let result = display_criteria(criteria, criterion_parameters);
insta::assert_snapshot!(result, @r###" insta::assert_snapshot!(result, @r###"
CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, bucket_candidates: None } CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, initial_candidates: None }
"###); "###);
} }
@ -399,7 +394,7 @@ mod test {
Exact { word: "split" } Exact { word: "split" }
Exact { word: "this" } Exact { word: "this" }
Exact { word: "world" } Exact { word: "world" }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) } ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
CriterionResult { query_tree: Some(OR CriterionResult { query_tree: Some(OR
AND AND
@ -408,7 +403,7 @@ mod test {
OR OR
Exact { word: "word" } Exact { word: "word" }
Exact { word: "world" } Exact { word: "world" }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) } ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
"###); "###);
} }
@ -434,7 +429,7 @@ mod test {
let result = display_criteria(criteria, criterion_parameters); let result = display_criteria(criteria, criterion_parameters);
insta::assert_snapshot!(result, @r###" insta::assert_snapshot!(result, @r###"
CriterionResult { query_tree: None, candidates: None, filtered_candidates: Some(RoaringBitmap<8000 values between 986424 and 4294786076>), bucket_candidates: None } CriterionResult { query_tree: None, candidates: None, filtered_candidates: Some(RoaringBitmap<8000 values between 986424 and 4294786076>), initial_candidates: None }
"###); "###);
} }
@ -482,7 +477,7 @@ mod test {
Exact { word: "split" } Exact { word: "split" }
Exact { word: "this" } Exact { word: "this" }
Exact { word: "world" } Exact { word: "world" }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) } ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
CriterionResult { query_tree: Some(OR CriterionResult { query_tree: Some(OR
AND AND
@ -491,7 +486,7 @@ mod test {
OR OR
Exact { word: "word" } Exact { word: "word" }
Exact { word: "world" } Exact { word: "world" }
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) } ), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
"###); "###);
} }

View File

@ -1,9 +1,8 @@
use std::mem::take;
use log::debug; use log::debug;
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult}; use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::InitialCandidates;
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::Result; use crate::Result;
@ -11,7 +10,7 @@ pub struct Words<'t> {
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
query_trees: Vec<Operation>, query_trees: Vec<Operation>,
candidates: Option<RoaringBitmap>, candidates: Option<RoaringBitmap>,
bucket_candidates: Option<RoaringBitmap>, initial_candidates: Option<InitialCandidates>,
filtered_candidates: Option<RoaringBitmap>, filtered_candidates: Option<RoaringBitmap>,
parent: Box<dyn Criterion + 't>, parent: Box<dyn Criterion + 't>,
} }
@ -22,7 +21,7 @@ impl<'t> Words<'t> {
ctx, ctx,
query_trees: Vec::default(), query_trees: Vec::default(),
candidates: None, candidates: None,
bucket_candidates: None, initial_candidates: None,
parent, parent,
filtered_candidates: None, filtered_candidates: None,
} }
@ -53,13 +52,13 @@ impl<'t> Criterion for Words<'t> {
None => None, None => None,
}; };
let bucket_candidates = self.bucket_candidates.as_mut().map(take); let initial_candidates = self.initial_candidates.clone();
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates, candidates,
filtered_candidates: self.filtered_candidates.clone(), filtered_candidates: self.filtered_candidates.clone(),
bucket_candidates, initial_candidates,
})); }));
} }
None => match self.parent.next(params)? { None => match self.parent.next(params)? {
@ -67,14 +66,14 @@ impl<'t> Criterion for Words<'t> {
query_tree: Some(query_tree), query_tree: Some(query_tree),
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
self.query_trees = explode_query_tree(query_tree); self.query_trees = explode_query_tree(query_tree);
self.candidates = candidates; self.candidates = candidates;
self.filtered_candidates = filtered_candidates; self.filtered_candidates = filtered_candidates;
self.bucket_candidates = self.initial_candidates =
match (self.bucket_candidates.take(), bucket_candidates) { match (self.initial_candidates.take(), initial_candidates) {
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc), (Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
(self_bc, parent_bc) => self_bc.or(parent_bc), (self_bc, parent_bc) => self_bc.or(parent_bc),
}; };
@ -83,13 +82,13 @@ impl<'t> Criterion for Words<'t> {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
}) => { }) => {
return Ok(Some(CriterionResult { return Ok(Some(CriterionResult {
query_tree: None, query_tree: None,
candidates, candidates,
filtered_candidates, filtered_candidates,
bucket_candidates, initial_candidates,
})); }));
} }
None => return Ok(None), None => return Ok(None),

View File

@ -23,6 +23,7 @@ pub use self::matches::{
use self::query_tree::QueryTreeBuilder; use self::query_tree::QueryTreeBuilder;
use crate::error::UserError; use crate::error::UserError;
use crate::search::criteria::r#final::{Final, FinalResult}; use crate::search::criteria::r#final::{Final, FinalResult};
use crate::search::criteria::InitialCandidates;
use crate::{AscDesc, Criterion, DocumentId, Index, Member, Result}; use crate::{AscDesc, Criterion, DocumentId, Index, Member, Result};
// Building these factories is not free. // Building these factories is not free.
@ -235,11 +236,11 @@ impl<'a> Search<'a> {
mut criteria: Final, mut criteria: Final,
) -> Result<SearchResult> { ) -> Result<SearchResult> {
let mut offset = self.offset; let mut offset = self.offset;
let mut initial_candidates = RoaringBitmap::new(); let mut initial_candidates = InitialCandidates::Estimated(RoaringBitmap::new());
let mut excluded_candidates = self.index.soft_deleted_documents_ids(self.rtxn)?; let mut excluded_candidates = self.index.soft_deleted_documents_ids(self.rtxn)?;
let mut documents_ids = Vec::new(); let mut documents_ids = Vec::new();
while let Some(FinalResult { candidates, bucket_candidates, .. }) = while let Some(FinalResult { candidates, initial_candidates: ic, .. }) =
criteria.next(&excluded_candidates)? criteria.next(&excluded_candidates)?
{ {
debug!("Number of candidates found {}", candidates.len()); debug!("Number of candidates found {}", candidates.len());
@ -247,7 +248,7 @@ impl<'a> Search<'a> {
let excluded = take(&mut excluded_candidates); let excluded = take(&mut excluded_candidates);
let mut candidates = distinct.distinct(candidates, excluded); let mut candidates = distinct.distinct(candidates, excluded);
initial_candidates |= bucket_candidates; initial_candidates |= ic;
if offset != 0 { if offset != 0 {
let discarded = candidates.by_ref().take(offset).count(); let discarded = candidates.by_ref().take(offset).count();
@ -265,9 +266,11 @@ impl<'a> Search<'a> {
} }
} }
initial_candidates.map_inplace(|c| c - excluded_candidates);
Ok(SearchResult { Ok(SearchResult {
matching_words, matching_words,
candidates: initial_candidates - excluded_candidates, candidates: initial_candidates.into_inner(),
documents_ids, documents_ids,
}) })
} }