mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-26 14:54:27 +01:00
Merge #729
729: Fix distincted exhaustive hits r=Kerollmops a=ManyTheFish This PR changes the name and behavior of `bucket_candidates`: - `bucket_candidates` become `initial_candidates` that is less confusing - `initial_candidates` is no more a simple `RoaringBitmap` but an enum allowing us to precise if the candidates are exhaustive or not - this enum ensures that any modification is allowed only if the candidates are not already exhaustive. The bug occurred because `initial_candidates` are modified during the bucket sort allowing the estimation to be more and more precise along the search, and this was an issue when the `initial_candidates` were already exhaustive, now, if candidates are exhaustive, then no modifications are made. Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
1f1beae077
@ -9,7 +9,7 @@ use super::{Criterion, CriterionParameters, CriterionResult};
|
|||||||
use crate::facet::FacetType;
|
use crate::facet::FacetType;
|
||||||
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
use crate::heed_codec::facet::FacetGroupKeyCodec;
|
||||||
use crate::heed_codec::ByteSliceRefCodec;
|
use crate::heed_codec::ByteSliceRefCodec;
|
||||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
||||||
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
use crate::search::facet::{ascending_facet_sort, descending_facet_sort};
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
use crate::{FieldId, Index, Result};
|
use crate::{FieldId, Index, Result};
|
||||||
@ -27,7 +27,7 @@ pub struct AscDesc<'t> {
|
|||||||
query_tree: Option<Operation>,
|
query_tree: Option<Operation>,
|
||||||
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
candidates: Box<dyn Iterator<Item = heed::Result<RoaringBitmap>> + 't>,
|
||||||
allowed_candidates: RoaringBitmap,
|
allowed_candidates: RoaringBitmap,
|
||||||
bucket_candidates: RoaringBitmap,
|
initial_candidates: InitialCandidates,
|
||||||
faceted_candidates: RoaringBitmap,
|
faceted_candidates: RoaringBitmap,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
}
|
}
|
||||||
@ -81,7 +81,7 @@ impl<'t> AscDesc<'t> {
|
|||||||
candidates: Box::new(std::iter::empty()),
|
candidates: Box::new(std::iter::empty()),
|
||||||
allowed_candidates: RoaringBitmap::new(),
|
allowed_candidates: RoaringBitmap::new(),
|
||||||
faceted_candidates,
|
faceted_candidates,
|
||||||
bucket_candidates: RoaringBitmap::new(),
|
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||||
parent,
|
parent,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
@ -106,7 +106,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
query_tree: self.query_tree.clone(),
|
query_tree: self.query_tree.clone(),
|
||||||
candidates: Some(take(&mut self.allowed_candidates)),
|
candidates: Some(take(&mut self.allowed_candidates)),
|
||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
initial_candidates: Some(self.initial_candidates.take()),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => match self.parent.next(params)? {
|
None => match self.parent.next(params)? {
|
||||||
@ -114,7 +114,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
query_tree,
|
query_tree,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
self.query_tree = query_tree;
|
self.query_tree = query_tree;
|
||||||
let mut candidates = match (&self.query_tree, candidates) {
|
let mut candidates = match (&self.query_tree, candidates) {
|
||||||
@ -130,9 +130,11 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
candidates &= filtered_candidates;
|
candidates &= filtered_candidates;
|
||||||
}
|
}
|
||||||
|
|
||||||
match bucket_candidates {
|
match initial_candidates {
|
||||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
Some(initial_candidates) => {
|
||||||
None => self.bucket_candidates |= &candidates,
|
self.initial_candidates |= initial_candidates
|
||||||
|
}
|
||||||
|
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||||
}
|
}
|
||||||
|
|
||||||
if candidates.is_empty() {
|
if candidates.is_empty() {
|
||||||
@ -160,7 +162,7 @@ impl<'t> Criterion for AscDesc<'t> {
|
|||||||
query_tree: self.query_tree.clone(),
|
query_tree: self.query_tree.clone(),
|
||||||
candidates: Some(candidates),
|
candidates: Some(candidates),
|
||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
initial_candidates: Some(self.initial_candidates.take()),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -7,7 +7,7 @@ use std::mem::take;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||||
use crate::search::criteria::Query;
|
use crate::search::criteria::{InitialCandidates, Query};
|
||||||
use crate::search::query_tree::{Operation, QueryKind};
|
use crate::search::query_tree::{Operation, QueryKind};
|
||||||
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
|
use crate::search::{build_dfa, word_derivations, WordDerivationsCache};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@ -26,7 +26,7 @@ type FlattenedQueryTree = Vec<Vec<Vec<Query>>>;
|
|||||||
pub struct Attribute<'t> {
|
pub struct Attribute<'t> {
|
||||||
ctx: &'t dyn Context<'t>,
|
ctx: &'t dyn Context<'t>,
|
||||||
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
|
state: Option<(Operation, FlattenedQueryTree, RoaringBitmap)>,
|
||||||
bucket_candidates: RoaringBitmap,
|
initial_candidates: InitialCandidates,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
linear_buckets: Option<btree_map::IntoIter<u64, RoaringBitmap>>,
|
||||||
set_buckets: Option<BinaryHeap<Branch<'t>>>,
|
set_buckets: Option<BinaryHeap<Branch<'t>>>,
|
||||||
@ -37,7 +37,7 @@ impl<'t> Attribute<'t> {
|
|||||||
Attribute {
|
Attribute {
|
||||||
ctx,
|
ctx,
|
||||||
state: None,
|
state: None,
|
||||||
bucket_candidates: RoaringBitmap::new(),
|
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||||
parent,
|
parent,
|
||||||
linear_buckets: None,
|
linear_buckets: None,
|
||||||
set_buckets: None,
|
set_buckets: None,
|
||||||
@ -60,7 +60,7 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
query_tree: Some(query_tree),
|
query_tree: Some(query_tree),
|
||||||
candidates: Some(RoaringBitmap::new()),
|
candidates: Some(RoaringBitmap::new()),
|
||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
initial_candidates: Some(self.initial_candidates.take()),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
|
Some((query_tree, flattened_query_tree, mut allowed_candidates)) => {
|
||||||
@ -84,7 +84,7 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
query_tree: Some(query_tree),
|
query_tree: Some(query_tree),
|
||||||
candidates: Some(RoaringBitmap::new()),
|
candidates: Some(RoaringBitmap::new()),
|
||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
initial_candidates: Some(self.initial_candidates.take()),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -109,7 +109,7 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
query_tree: Some(query_tree),
|
query_tree: Some(query_tree),
|
||||||
candidates: Some(RoaringBitmap::new()),
|
candidates: Some(RoaringBitmap::new()),
|
||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
initial_candidates: Some(self.initial_candidates.take()),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -124,7 +124,7 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
query_tree: Some(query_tree),
|
query_tree: Some(query_tree),
|
||||||
candidates: Some(found_candidates),
|
candidates: Some(found_candidates),
|
||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
initial_candidates: Some(self.initial_candidates.take()),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => match self.parent.next(params)? {
|
None => match self.parent.next(params)? {
|
||||||
@ -132,7 +132,7 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
query_tree: Some(query_tree),
|
query_tree: Some(query_tree),
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
let mut candidates = match candidates {
|
let mut candidates = match candidates {
|
||||||
Some(candidates) => candidates,
|
Some(candidates) => candidates,
|
||||||
@ -148,9 +148,11 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
|
|
||||||
let flattened_query_tree = flatten_query_tree(&query_tree);
|
let flattened_query_tree = flatten_query_tree(&query_tree);
|
||||||
|
|
||||||
match bucket_candidates {
|
match initial_candidates {
|
||||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
Some(initial_candidates) => {
|
||||||
None => self.bucket_candidates |= &candidates,
|
self.initial_candidates |= initial_candidates
|
||||||
|
}
|
||||||
|
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||||
}
|
}
|
||||||
|
|
||||||
self.state = Some((query_tree, flattened_query_tree, candidates));
|
self.state = Some((query_tree, flattened_query_tree, candidates));
|
||||||
@ -160,13 +162,13 @@ impl<'t> Criterion for Attribute<'t> {
|
|||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
|
@ -8,6 +8,7 @@ use roaring::RoaringBitmap;
|
|||||||
|
|
||||||
use crate::search::criteria::{
|
use crate::search::criteria::{
|
||||||
resolve_phrase, resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
|
resolve_phrase, resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult,
|
||||||
|
InitialCandidates,
|
||||||
};
|
};
|
||||||
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
|
use crate::search::query_tree::{Operation, PrimitiveQueryPart};
|
||||||
use crate::{absolute_from_relative_position, FieldId, Result};
|
use crate::{absolute_from_relative_position, FieldId, Result};
|
||||||
@ -16,7 +17,7 @@ pub struct Exactness<'t> {
|
|||||||
ctx: &'t dyn Context<'t>,
|
ctx: &'t dyn Context<'t>,
|
||||||
query_tree: Option<Operation>,
|
query_tree: Option<Operation>,
|
||||||
state: Option<State>,
|
state: Option<State>,
|
||||||
bucket_candidates: RoaringBitmap,
|
initial_candidates: InitialCandidates,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
query: Vec<ExactQueryPart>,
|
query: Vec<ExactQueryPart>,
|
||||||
}
|
}
|
||||||
@ -36,7 +37,7 @@ impl<'t> Exactness<'t> {
|
|||||||
ctx,
|
ctx,
|
||||||
query_tree: None,
|
query_tree: None,
|
||||||
state: None,
|
state: None,
|
||||||
bucket_candidates: RoaringBitmap::new(),
|
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||||
parent,
|
parent,
|
||||||
query,
|
query,
|
||||||
})
|
})
|
||||||
@ -68,7 +69,7 @@ impl<'t> Criterion for Exactness<'t> {
|
|||||||
query_tree: self.query_tree.clone(),
|
query_tree: self.query_tree.clone(),
|
||||||
candidates: Some(candidates),
|
candidates: Some(candidates),
|
||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
initial_candidates: Some(self.initial_candidates.take()),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => match self.parent.next(params)? {
|
None => match self.parent.next(params)? {
|
||||||
@ -76,7 +77,7 @@ impl<'t> Criterion for Exactness<'t> {
|
|||||||
query_tree: Some(query_tree),
|
query_tree: Some(query_tree),
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
let mut candidates = match candidates {
|
let mut candidates = match candidates {
|
||||||
Some(candidates) => candidates,
|
Some(candidates) => candidates,
|
||||||
@ -90,9 +91,11 @@ impl<'t> Criterion for Exactness<'t> {
|
|||||||
candidates &= filtered_candidates;
|
candidates &= filtered_candidates;
|
||||||
}
|
}
|
||||||
|
|
||||||
match bucket_candidates {
|
match initial_candidates {
|
||||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
Some(initial_candidates) => {
|
||||||
None => self.bucket_candidates |= &candidates,
|
self.initial_candidates |= initial_candidates
|
||||||
|
}
|
||||||
|
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||||
}
|
}
|
||||||
|
|
||||||
self.state = Some(State::new(candidates));
|
self.state = Some(State::new(candidates));
|
||||||
@ -102,13 +105,13 @@ impl<'t> Criterion for Exactness<'t> {
|
|||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
|
@ -2,6 +2,7 @@ use log::debug;
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||||
|
use crate::search::criteria::InitialCandidates;
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
use crate::search::WordDerivationsCache;
|
use crate::search::WordDerivationsCache;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@ -14,7 +15,7 @@ pub struct FinalResult {
|
|||||||
/// The candidates of the current bucket of the last criterion.
|
/// The candidates of the current bucket of the last criterion.
|
||||||
pub candidates: RoaringBitmap,
|
pub candidates: RoaringBitmap,
|
||||||
/// Candidates that comes from the current bucket of the initial criterion.
|
/// Candidates that comes from the current bucket of the initial criterion.
|
||||||
pub bucket_candidates: RoaringBitmap,
|
pub initial_candidates: InitialCandidates,
|
||||||
}
|
}
|
||||||
|
|
||||||
pub struct Final<'t> {
|
pub struct Final<'t> {
|
||||||
@ -49,7 +50,7 @@ impl<'t> Final<'t> {
|
|||||||
query_tree,
|
query_tree,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
let mut candidates = match (candidates, query_tree.as_ref()) {
|
let mut candidates = match (candidates, query_tree.as_ref()) {
|
||||||
(Some(candidates), _) => candidates,
|
(Some(candidates), _) => candidates,
|
||||||
@ -63,11 +64,12 @@ impl<'t> Final<'t> {
|
|||||||
candidates &= filtered_candidates;
|
candidates &= filtered_candidates;
|
||||||
}
|
}
|
||||||
|
|
||||||
let bucket_candidates = bucket_candidates.unwrap_or_else(|| candidates.clone());
|
let initial_candidates = initial_candidates
|
||||||
|
.unwrap_or_else(|| InitialCandidates::Estimated(candidates.clone()));
|
||||||
|
|
||||||
self.returned_candidates |= &candidates;
|
self.returned_candidates |= &candidates;
|
||||||
|
|
||||||
Ok(Some(FinalResult { query_tree, candidates, bucket_candidates }))
|
Ok(Some(FinalResult { query_tree, candidates, initial_candidates }))
|
||||||
}
|
}
|
||||||
None => Ok(None),
|
None => Ok(None),
|
||||||
}
|
}
|
||||||
|
@ -4,7 +4,7 @@ use roaring::RoaringBitmap;
|
|||||||
use rstar::RTree;
|
use rstar::RTree;
|
||||||
|
|
||||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||||
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder};
|
use crate::search::criteria::{resolve_query_tree, CriteriaBuilder, InitialCandidates};
|
||||||
use crate::{lat_lng_to_xyz, GeoPoint, Index, Result};
|
use crate::{lat_lng_to_xyz, GeoPoint, Index, Result};
|
||||||
|
|
||||||
pub struct Geo<'t> {
|
pub struct Geo<'t> {
|
||||||
@ -14,7 +14,7 @@ pub struct Geo<'t> {
|
|||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
candidates: Box<dyn Iterator<Item = RoaringBitmap>>,
|
candidates: Box<dyn Iterator<Item = RoaringBitmap>>,
|
||||||
allowed_candidates: RoaringBitmap,
|
allowed_candidates: RoaringBitmap,
|
||||||
bucket_candidates: RoaringBitmap,
|
initial_candidates: InitialCandidates,
|
||||||
rtree: Option<RTree<GeoPoint>>,
|
rtree: Option<RTree<GeoPoint>>,
|
||||||
point: [f64; 2],
|
point: [f64; 2],
|
||||||
}
|
}
|
||||||
@ -47,7 +47,7 @@ impl<'t> Geo<'t> {
|
|||||||
) -> Result<Self> {
|
) -> Result<Self> {
|
||||||
let candidates = Box::new(iter::empty());
|
let candidates = Box::new(iter::empty());
|
||||||
let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?;
|
let allowed_candidates = index.geo_faceted_documents_ids(rtxn)?;
|
||||||
let bucket_candidates = RoaringBitmap::new();
|
let initial_candidates = InitialCandidates::Estimated(RoaringBitmap::new());
|
||||||
let rtree = index.geo_rtree(rtxn)?;
|
let rtree = index.geo_rtree(rtxn)?;
|
||||||
|
|
||||||
Ok(Self {
|
Ok(Self {
|
||||||
@ -57,7 +57,7 @@ impl<'t> Geo<'t> {
|
|||||||
parent,
|
parent,
|
||||||
candidates,
|
candidates,
|
||||||
allowed_candidates,
|
allowed_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
rtree,
|
rtree,
|
||||||
point,
|
point,
|
||||||
})
|
})
|
||||||
@ -77,7 +77,7 @@ impl Criterion for Geo<'_> {
|
|||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates: Some(candidates),
|
candidates: Some(candidates),
|
||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
bucket_candidates: Some(self.bucket_candidates.clone()),
|
initial_candidates: Some(self.initial_candidates.clone()),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => match self.parent.next(params)? {
|
None => match self.parent.next(params)? {
|
||||||
@ -85,7 +85,7 @@ impl Criterion for Geo<'_> {
|
|||||||
query_tree,
|
query_tree,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
let mut candidates = match (&query_tree, candidates) {
|
let mut candidates = match (&query_tree, candidates) {
|
||||||
(_, Some(candidates)) => candidates,
|
(_, Some(candidates)) => candidates,
|
||||||
@ -100,9 +100,11 @@ impl Criterion for Geo<'_> {
|
|||||||
candidates &= filtered_candidates;
|
candidates &= filtered_candidates;
|
||||||
}
|
}
|
||||||
|
|
||||||
match bucket_candidates {
|
match initial_candidates {
|
||||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
Some(initial_candidates) => {
|
||||||
None => self.bucket_candidates |= &candidates,
|
self.initial_candidates |= initial_candidates
|
||||||
|
}
|
||||||
|
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||||
}
|
}
|
||||||
|
|
||||||
if candidates.is_empty() {
|
if candidates.is_empty() {
|
||||||
|
@ -1,7 +1,7 @@
|
|||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||||
use crate::search::criteria::{resolve_query_tree, Context};
|
use crate::search::criteria::{resolve_query_tree, Context, InitialCandidates};
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
use crate::search::Distinct;
|
use crate::search::Distinct;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@ -27,7 +27,7 @@ impl<'t, D> Initial<'t, D> {
|
|||||||
query_tree,
|
query_tree,
|
||||||
candidates: None,
|
candidates: None,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates: None,
|
initial_candidates: None,
|
||||||
};
|
};
|
||||||
Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
|
Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
|
||||||
}
|
}
|
||||||
@ -41,32 +41,34 @@ impl<D: Distinct> Criterion for Initial<'_, D> {
|
|||||||
.map(|mut answer| {
|
.map(|mut answer| {
|
||||||
if self.exhaustive_number_hits && answer.query_tree.is_some() {
|
if self.exhaustive_number_hits && answer.query_tree.is_some() {
|
||||||
// resolve the whole query tree to retrieve an exhaustive list of documents matching the query.
|
// resolve the whole query tree to retrieve an exhaustive list of documents matching the query.
|
||||||
|
// then remove the potential soft deleted documents.
|
||||||
let mut candidates = resolve_query_tree(
|
let mut candidates = resolve_query_tree(
|
||||||
self.ctx,
|
self.ctx,
|
||||||
answer.query_tree.as_ref().unwrap(),
|
answer.query_tree.as_ref().unwrap(),
|
||||||
params.wdcache,
|
params.wdcache,
|
||||||
)?;
|
)? - params.excluded_candidates;
|
||||||
|
|
||||||
// Apply the filters on the documents retrieved with the query tree.
|
// Apply the filters on the documents retrieved with the query tree.
|
||||||
if let Some(ref filtered_candidates) = answer.filtered_candidates {
|
if let Some(ref filtered_candidates) = answer.filtered_candidates {
|
||||||
candidates &= filtered_candidates;
|
candidates &= filtered_candidates;
|
||||||
}
|
}
|
||||||
|
|
||||||
// because the bucket_candidates should be an exhaustive count of the matching documents,
|
// because the initial_candidates should be an exhaustive count of the matching documents,
|
||||||
// we precompute the distinct attributes.
|
// we precompute the distinct attributes.
|
||||||
let bucket_candidates = match &mut self.distinct {
|
let initial_candidates = match &mut self.distinct {
|
||||||
Some(distinct) => {
|
Some(distinct) => {
|
||||||
let mut bucket_candidates = RoaringBitmap::new();
|
let mut initial_candidates = RoaringBitmap::new();
|
||||||
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
|
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
|
||||||
bucket_candidates.insert(c?);
|
initial_candidates.insert(c?);
|
||||||
}
|
}
|
||||||
bucket_candidates
|
initial_candidates
|
||||||
}
|
}
|
||||||
None => candidates.clone(),
|
None => candidates.clone(),
|
||||||
};
|
};
|
||||||
|
|
||||||
answer.candidates = Some(candidates);
|
answer.candidates = Some(candidates);
|
||||||
answer.bucket_candidates = Some(bucket_candidates);
|
answer.initial_candidates =
|
||||||
|
Some(InitialCandidates::Exhaustive(initial_candidates));
|
||||||
}
|
}
|
||||||
Ok(answer)
|
Ok(answer)
|
||||||
})
|
})
|
||||||
|
@ -1,5 +1,7 @@
|
|||||||
use std::borrow::Cow;
|
use std::borrow::Cow;
|
||||||
use std::collections::HashMap;
|
use std::collections::HashMap;
|
||||||
|
use std::mem::take;
|
||||||
|
use std::ops::{BitOr, BitOrAssign};
|
||||||
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
@ -41,7 +43,7 @@ pub struct CriterionResult {
|
|||||||
/// The candidates, coming from facet filters, that this criterion is allowed to return subsets of.
|
/// The candidates, coming from facet filters, that this criterion is allowed to return subsets of.
|
||||||
filtered_candidates: Option<RoaringBitmap>,
|
filtered_candidates: Option<RoaringBitmap>,
|
||||||
/// Candidates that comes from the current bucket of the initial criterion.
|
/// Candidates that comes from the current bucket of the initial criterion.
|
||||||
bucket_candidates: Option<RoaringBitmap>,
|
initial_candidates: Option<InitialCandidates>,
|
||||||
}
|
}
|
||||||
|
|
||||||
#[derive(Debug, PartialEq)]
|
#[derive(Debug, PartialEq)]
|
||||||
@ -65,6 +67,71 @@ impl Default for Candidates {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Either a set of candidates that defines the estimated set of candidates
|
||||||
|
/// that could be returned,
|
||||||
|
/// or the Exhaustive set of candidates that will be returned if all possible results are fetched.
|
||||||
|
#[derive(Debug, Clone, PartialEq)]
|
||||||
|
pub enum InitialCandidates {
|
||||||
|
Estimated(RoaringBitmap),
|
||||||
|
Exhaustive(RoaringBitmap),
|
||||||
|
}
|
||||||
|
|
||||||
|
impl InitialCandidates {
|
||||||
|
fn take(&mut self) -> Self {
|
||||||
|
match self {
|
||||||
|
Self::Estimated(c) => Self::Estimated(take(c)),
|
||||||
|
Self::Exhaustive(c) => Self::Exhaustive(take(c)),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/// modify the containing roaring bitmap inplace if the set isn't already Exhaustive.
|
||||||
|
pub fn map_inplace<F>(&mut self, f: F)
|
||||||
|
where
|
||||||
|
F: FnOnce(RoaringBitmap) -> RoaringBitmap,
|
||||||
|
{
|
||||||
|
if let Self::Estimated(c) = self {
|
||||||
|
*c = f(take(c))
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
pub fn into_inner(self) -> RoaringBitmap {
|
||||||
|
match self {
|
||||||
|
Self::Estimated(c) => c,
|
||||||
|
Self::Exhaustive(c) => c,
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BitOrAssign for InitialCandidates {
|
||||||
|
/// Make an union between the containing roaring bitmaps if the set isn't already Exhaustive.
|
||||||
|
/// In the case of rhs is Exhaustive and not self, then rhs replaces self.
|
||||||
|
fn bitor_assign(&mut self, rhs: Self) {
|
||||||
|
if let Self::Estimated(c) = self {
|
||||||
|
*self = match rhs {
|
||||||
|
Self::Estimated(rhs) => Self::Estimated(rhs | &*c),
|
||||||
|
Self::Exhaustive(rhs) => Self::Exhaustive(rhs),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
impl BitOr for InitialCandidates {
|
||||||
|
type Output = Self;
|
||||||
|
|
||||||
|
/// Make an union between the containing roaring bitmaps if the set isn't already Exhaustive.
|
||||||
|
/// In the case of rhs is Exhaustive and not self, then rhs replaces self.
|
||||||
|
fn bitor(self, rhs: Self) -> Self::Output {
|
||||||
|
if let Self::Estimated(c) = self {
|
||||||
|
match rhs {
|
||||||
|
Self::Estimated(rhs) => Self::Estimated(rhs | c),
|
||||||
|
Self::Exhaustive(rhs) => Self::Exhaustive(rhs),
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
self.clone()
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
pub trait Context<'c> {
|
pub trait Context<'c> {
|
||||||
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
|
fn documents_ids(&self) -> heed::Result<RoaringBitmap>;
|
||||||
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
fn word_docids(&self, word: &str) -> heed::Result<Option<RoaringBitmap>>;
|
||||||
|
@ -1,6 +1,5 @@
|
|||||||
use std::collections::btree_map::{self, BTreeMap};
|
use std::collections::btree_map::{self, BTreeMap};
|
||||||
use std::collections::hash_map::HashMap;
|
use std::collections::hash_map::HashMap;
|
||||||
use std::mem::take;
|
|
||||||
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
@ -10,6 +9,7 @@ use super::{
|
|||||||
query_docids, query_pair_proximity_docids, resolve_phrase, resolve_query_tree, Context,
|
query_docids, query_pair_proximity_docids, resolve_phrase, resolve_query_tree, Context,
|
||||||
Criterion, CriterionParameters, CriterionResult,
|
Criterion, CriterionParameters, CriterionResult,
|
||||||
};
|
};
|
||||||
|
use crate::search::criteria::InitialCandidates;
|
||||||
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
|
use crate::search::query_tree::{maximum_proximity, Operation, Query, QueryKind};
|
||||||
use crate::search::{build_dfa, WordDerivationsCache};
|
use crate::search::{build_dfa, WordDerivationsCache};
|
||||||
use crate::{Position, Result};
|
use crate::{Position, Result};
|
||||||
@ -29,7 +29,7 @@ pub struct Proximity<'t> {
|
|||||||
/// (max_proximity, query_tree, allowed_candidates)
|
/// (max_proximity, query_tree, allowed_candidates)
|
||||||
state: Option<(u8, Operation, RoaringBitmap)>,
|
state: Option<(u8, Operation, RoaringBitmap)>,
|
||||||
proximity: u8,
|
proximity: u8,
|
||||||
bucket_candidates: RoaringBitmap,
|
initial_candidates: InitialCandidates,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
candidates_cache: Cache,
|
candidates_cache: Cache,
|
||||||
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
|
plane_sweep_cache: Option<btree_map::IntoIter<u8, RoaringBitmap>>,
|
||||||
@ -41,7 +41,7 @@ impl<'t> Proximity<'t> {
|
|||||||
ctx,
|
ctx,
|
||||||
state: None,
|
state: None,
|
||||||
proximity: 0,
|
proximity: 0,
|
||||||
bucket_candidates: RoaringBitmap::new(),
|
initial_candidates: InitialCandidates::Estimated(RoaringBitmap::new()),
|
||||||
parent,
|
parent,
|
||||||
candidates_cache: Cache::new(),
|
candidates_cache: Cache::new(),
|
||||||
plane_sweep_cache: None,
|
plane_sweep_cache: None,
|
||||||
@ -115,7 +115,7 @@ impl<'t> Criterion for Proximity<'t> {
|
|||||||
query_tree: Some(query_tree.clone()),
|
query_tree: Some(query_tree.clone()),
|
||||||
candidates: Some(new_candidates),
|
candidates: Some(new_candidates),
|
||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
bucket_candidates: Some(take(&mut self.bucket_candidates)),
|
initial_candidates: Some(self.initial_candidates.take()),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => match self.parent.next(params)? {
|
None => match self.parent.next(params)? {
|
||||||
@ -123,7 +123,7 @@ impl<'t> Criterion for Proximity<'t> {
|
|||||||
query_tree: Some(query_tree),
|
query_tree: Some(query_tree),
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
let mut candidates = match candidates {
|
let mut candidates = match candidates {
|
||||||
Some(candidates) => candidates,
|
Some(candidates) => candidates,
|
||||||
@ -137,9 +137,11 @@ impl<'t> Criterion for Proximity<'t> {
|
|||||||
candidates &= filtered_candidates;
|
candidates &= filtered_candidates;
|
||||||
}
|
}
|
||||||
|
|
||||||
match bucket_candidates {
|
match initial_candidates {
|
||||||
Some(bucket_candidates) => self.bucket_candidates |= bucket_candidates,
|
Some(initial_candidates) => {
|
||||||
None => self.bucket_candidates |= &candidates,
|
self.initial_candidates |= initial_candidates
|
||||||
|
}
|
||||||
|
None => self.initial_candidates.map_inplace(|c| c | &candidates),
|
||||||
}
|
}
|
||||||
|
|
||||||
let maximum_proximity = maximum_proximity(&query_tree);
|
let maximum_proximity = maximum_proximity(&query_tree);
|
||||||
@ -151,13 +153,13 @@ impl<'t> Criterion for Proximity<'t> {
|
|||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
|
@ -9,7 +9,7 @@ use super::{
|
|||||||
query_docids, resolve_query_tree, Candidates, Context, Criterion, CriterionParameters,
|
query_docids, resolve_query_tree, Candidates, Context, Criterion, CriterionParameters,
|
||||||
CriterionResult,
|
CriterionResult,
|
||||||
};
|
};
|
||||||
use crate::search::criteria::resolve_phrase;
|
use crate::search::criteria::{resolve_phrase, InitialCandidates};
|
||||||
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
|
use crate::search::query_tree::{maximum_typo, Operation, Query, QueryKind};
|
||||||
use crate::search::{word_derivations, WordDerivationsCache};
|
use crate::search::{word_derivations, WordDerivationsCache};
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
@ -22,7 +22,7 @@ pub struct Typo<'t> {
|
|||||||
/// (max_typos, query_tree, candidates)
|
/// (max_typos, query_tree, candidates)
|
||||||
state: Option<(u8, Operation, Candidates)>,
|
state: Option<(u8, Operation, Candidates)>,
|
||||||
typos: u8,
|
typos: u8,
|
||||||
bucket_candidates: Option<RoaringBitmap>,
|
initial_candidates: Option<InitialCandidates>,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
|
candidates_cache: HashMap<(Operation, u8), RoaringBitmap>,
|
||||||
}
|
}
|
||||||
@ -33,7 +33,7 @@ impl<'t> Typo<'t> {
|
|||||||
ctx,
|
ctx,
|
||||||
state: None,
|
state: None,
|
||||||
typos: 0,
|
typos: 0,
|
||||||
bucket_candidates: None,
|
initial_candidates: None,
|
||||||
parent,
|
parent,
|
||||||
candidates_cache: HashMap::new(),
|
candidates_cache: HashMap::new(),
|
||||||
}
|
}
|
||||||
@ -120,9 +120,9 @@ impl<'t> Criterion for Typo<'t> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let bucket_candidates = match self.bucket_candidates.as_mut() {
|
let initial_candidates = match self.initial_candidates.as_mut() {
|
||||||
Some(bucket_candidates) => take(bucket_candidates),
|
Some(initial_candidates) => initial_candidates.take(),
|
||||||
None => candidates.clone(),
|
None => InitialCandidates::Estimated(candidates.clone()),
|
||||||
};
|
};
|
||||||
|
|
||||||
self.typos += 1;
|
self.typos += 1;
|
||||||
@ -131,7 +131,7 @@ impl<'t> Criterion for Typo<'t> {
|
|||||||
query_tree: Some(new_query_tree),
|
query_tree: Some(new_query_tree),
|
||||||
candidates: Some(candidates),
|
candidates: Some(candidates),
|
||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
bucket_candidates: Some(bucket_candidates),
|
initial_candidates: Some(initial_candidates),
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => match self.parent.next(params)? {
|
None => match self.parent.next(params)? {
|
||||||
@ -139,14 +139,9 @@ impl<'t> Criterion for Typo<'t> {
|
|||||||
query_tree: Some(query_tree),
|
query_tree: Some(query_tree),
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
self.bucket_candidates =
|
self.initial_candidates = initial_candidates;
|
||||||
match (self.bucket_candidates.take(), bucket_candidates) {
|
|
||||||
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
|
|
||||||
(self_bc, parent_bc) => self_bc.or(parent_bc),
|
|
||||||
};
|
|
||||||
|
|
||||||
let candidates = match candidates.or(filtered_candidates) {
|
let candidates = match candidates.or(filtered_candidates) {
|
||||||
Some(candidates) => {
|
Some(candidates) => {
|
||||||
Candidates::Allowed(candidates - params.excluded_candidates)
|
Candidates::Allowed(candidates - params.excluded_candidates)
|
||||||
@ -162,13 +157,13 @@ impl<'t> Criterion for Typo<'t> {
|
|||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
@ -356,7 +351,7 @@ mod test {
|
|||||||
|
|
||||||
let result = display_criteria(criteria, criterion_parameters);
|
let result = display_criteria(criteria, criterion_parameters);
|
||||||
insta::assert_snapshot!(result, @r###"
|
insta::assert_snapshot!(result, @r###"
|
||||||
CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, bucket_candidates: None }
|
CriterionResult { query_tree: None, candidates: None, filtered_candidates: None, initial_candidates: None }
|
||||||
|
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -399,7 +394,7 @@ mod test {
|
|||||||
Exact { word: "split" }
|
Exact { word: "split" }
|
||||||
Exact { word: "this" }
|
Exact { word: "this" }
|
||||||
Exact { word: "world" }
|
Exact { word: "world" }
|
||||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
|
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||||
|
|
||||||
CriterionResult { query_tree: Some(OR
|
CriterionResult { query_tree: Some(OR
|
||||||
AND
|
AND
|
||||||
@ -408,7 +403,7 @@ mod test {
|
|||||||
OR
|
OR
|
||||||
Exact { word: "word" }
|
Exact { word: "word" }
|
||||||
Exact { word: "world" }
|
Exact { word: "world" }
|
||||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
|
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||||
|
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -434,7 +429,7 @@ mod test {
|
|||||||
|
|
||||||
let result = display_criteria(criteria, criterion_parameters);
|
let result = display_criteria(criteria, criterion_parameters);
|
||||||
insta::assert_snapshot!(result, @r###"
|
insta::assert_snapshot!(result, @r###"
|
||||||
CriterionResult { query_tree: None, candidates: None, filtered_candidates: Some(RoaringBitmap<8000 values between 986424 and 4294786076>), bucket_candidates: None }
|
CriterionResult { query_tree: None, candidates: None, filtered_candidates: Some(RoaringBitmap<8000 values between 986424 and 4294786076>), initial_candidates: None }
|
||||||
|
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
@ -482,7 +477,7 @@ mod test {
|
|||||||
Exact { word: "split" }
|
Exact { word: "split" }
|
||||||
Exact { word: "this" }
|
Exact { word: "this" }
|
||||||
Exact { word: "world" }
|
Exact { word: "world" }
|
||||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
|
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||||
|
|
||||||
CriterionResult { query_tree: Some(OR
|
CriterionResult { query_tree: Some(OR
|
||||||
AND
|
AND
|
||||||
@ -491,7 +486,7 @@ mod test {
|
|||||||
OR
|
OR
|
||||||
Exact { word: "word" }
|
Exact { word: "word" }
|
||||||
Exact { word: "world" }
|
Exact { word: "world" }
|
||||||
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, bucket_candidates: Some(RoaringBitmap<[]>) }
|
), candidates: Some(RoaringBitmap<[]>), filtered_candidates: None, initial_candidates: Some(Estimated(RoaringBitmap<[]>)) }
|
||||||
|
|
||||||
"###);
|
"###);
|
||||||
}
|
}
|
||||||
|
@ -1,9 +1,8 @@
|
|||||||
use std::mem::take;
|
|
||||||
|
|
||||||
use log::debug;
|
use log::debug;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
use super::{resolve_query_tree, Context, Criterion, CriterionParameters, CriterionResult};
|
||||||
|
use crate::search::criteria::InitialCandidates;
|
||||||
use crate::search::query_tree::Operation;
|
use crate::search::query_tree::Operation;
|
||||||
use crate::Result;
|
use crate::Result;
|
||||||
|
|
||||||
@ -11,7 +10,7 @@ pub struct Words<'t> {
|
|||||||
ctx: &'t dyn Context<'t>,
|
ctx: &'t dyn Context<'t>,
|
||||||
query_trees: Vec<Operation>,
|
query_trees: Vec<Operation>,
|
||||||
candidates: Option<RoaringBitmap>,
|
candidates: Option<RoaringBitmap>,
|
||||||
bucket_candidates: Option<RoaringBitmap>,
|
initial_candidates: Option<InitialCandidates>,
|
||||||
filtered_candidates: Option<RoaringBitmap>,
|
filtered_candidates: Option<RoaringBitmap>,
|
||||||
parent: Box<dyn Criterion + 't>,
|
parent: Box<dyn Criterion + 't>,
|
||||||
}
|
}
|
||||||
@ -22,7 +21,7 @@ impl<'t> Words<'t> {
|
|||||||
ctx,
|
ctx,
|
||||||
query_trees: Vec::default(),
|
query_trees: Vec::default(),
|
||||||
candidates: None,
|
candidates: None,
|
||||||
bucket_candidates: None,
|
initial_candidates: None,
|
||||||
parent,
|
parent,
|
||||||
filtered_candidates: None,
|
filtered_candidates: None,
|
||||||
}
|
}
|
||||||
@ -53,13 +52,13 @@ impl<'t> Criterion for Words<'t> {
|
|||||||
None => None,
|
None => None,
|
||||||
};
|
};
|
||||||
|
|
||||||
let bucket_candidates = self.bucket_candidates.as_mut().map(take);
|
let initial_candidates = self.initial_candidates.clone();
|
||||||
|
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: Some(query_tree),
|
query_tree: Some(query_tree),
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates: self.filtered_candidates.clone(),
|
filtered_candidates: self.filtered_candidates.clone(),
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => match self.parent.next(params)? {
|
None => match self.parent.next(params)? {
|
||||||
@ -67,14 +66,14 @@ impl<'t> Criterion for Words<'t> {
|
|||||||
query_tree: Some(query_tree),
|
query_tree: Some(query_tree),
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
self.query_trees = explode_query_tree(query_tree);
|
self.query_trees = explode_query_tree(query_tree);
|
||||||
self.candidates = candidates;
|
self.candidates = candidates;
|
||||||
self.filtered_candidates = filtered_candidates;
|
self.filtered_candidates = filtered_candidates;
|
||||||
|
|
||||||
self.bucket_candidates =
|
self.initial_candidates =
|
||||||
match (self.bucket_candidates.take(), bucket_candidates) {
|
match (self.initial_candidates.take(), initial_candidates) {
|
||||||
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
|
(Some(self_bc), Some(parent_bc)) => Some(self_bc | parent_bc),
|
||||||
(self_bc, parent_bc) => self_bc.or(parent_bc),
|
(self_bc, parent_bc) => self_bc.or(parent_bc),
|
||||||
};
|
};
|
||||||
@ -83,13 +82,13 @@ impl<'t> Criterion for Words<'t> {
|
|||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}) => {
|
}) => {
|
||||||
return Ok(Some(CriterionResult {
|
return Ok(Some(CriterionResult {
|
||||||
query_tree: None,
|
query_tree: None,
|
||||||
candidates,
|
candidates,
|
||||||
filtered_candidates,
|
filtered_candidates,
|
||||||
bucket_candidates,
|
initial_candidates,
|
||||||
}));
|
}));
|
||||||
}
|
}
|
||||||
None => return Ok(None),
|
None => return Ok(None),
|
||||||
|
@ -23,6 +23,7 @@ pub use self::matches::{
|
|||||||
use self::query_tree::QueryTreeBuilder;
|
use self::query_tree::QueryTreeBuilder;
|
||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::search::criteria::r#final::{Final, FinalResult};
|
use crate::search::criteria::r#final::{Final, FinalResult};
|
||||||
|
use crate::search::criteria::InitialCandidates;
|
||||||
use crate::{AscDesc, Criterion, DocumentId, Index, Member, Result};
|
use crate::{AscDesc, Criterion, DocumentId, Index, Member, Result};
|
||||||
|
|
||||||
// Building these factories is not free.
|
// Building these factories is not free.
|
||||||
@ -235,11 +236,11 @@ impl<'a> Search<'a> {
|
|||||||
mut criteria: Final,
|
mut criteria: Final,
|
||||||
) -> Result<SearchResult> {
|
) -> Result<SearchResult> {
|
||||||
let mut offset = self.offset;
|
let mut offset = self.offset;
|
||||||
let mut initial_candidates = RoaringBitmap::new();
|
let mut initial_candidates = InitialCandidates::Estimated(RoaringBitmap::new());
|
||||||
let mut excluded_candidates = self.index.soft_deleted_documents_ids(self.rtxn)?;
|
let mut excluded_candidates = self.index.soft_deleted_documents_ids(self.rtxn)?;
|
||||||
let mut documents_ids = Vec::new();
|
let mut documents_ids = Vec::new();
|
||||||
|
|
||||||
while let Some(FinalResult { candidates, bucket_candidates, .. }) =
|
while let Some(FinalResult { candidates, initial_candidates: ic, .. }) =
|
||||||
criteria.next(&excluded_candidates)?
|
criteria.next(&excluded_candidates)?
|
||||||
{
|
{
|
||||||
debug!("Number of candidates found {}", candidates.len());
|
debug!("Number of candidates found {}", candidates.len());
|
||||||
@ -247,7 +248,7 @@ impl<'a> Search<'a> {
|
|||||||
let excluded = take(&mut excluded_candidates);
|
let excluded = take(&mut excluded_candidates);
|
||||||
let mut candidates = distinct.distinct(candidates, excluded);
|
let mut candidates = distinct.distinct(candidates, excluded);
|
||||||
|
|
||||||
initial_candidates |= bucket_candidates;
|
initial_candidates |= ic;
|
||||||
|
|
||||||
if offset != 0 {
|
if offset != 0 {
|
||||||
let discarded = candidates.by_ref().take(offset).count();
|
let discarded = candidates.by_ref().take(offset).count();
|
||||||
@ -265,9 +266,11 @@ impl<'a> Search<'a> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
initial_candidates.map_inplace(|c| c - excluded_candidates);
|
||||||
|
|
||||||
Ok(SearchResult {
|
Ok(SearchResult {
|
||||||
matching_words,
|
matching_words,
|
||||||
candidates: initial_candidates - excluded_candidates,
|
candidates: initial_candidates.into_inner(),
|
||||||
documents_ids,
|
documents_ids,
|
||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
@ -8,7 +8,7 @@ use Criterion::*;
|
|||||||
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
use crate::search::{self, EXTERNAL_DOCUMENTS_IDS};
|
||||||
|
|
||||||
macro_rules! test_distinct {
|
macro_rules! test_distinct {
|
||||||
($func:ident, $distinct:ident, $criteria:expr, $n_res:expr) => {
|
($func:ident, $distinct:ident, $exhaustive:ident, $limit:expr, $criteria:expr, $n_res:expr) => {
|
||||||
#[test]
|
#[test]
|
||||||
fn $func() {
|
fn $func() {
|
||||||
let criteria = $criteria;
|
let criteria = $criteria;
|
||||||
@ -26,7 +26,8 @@ macro_rules! test_distinct {
|
|||||||
|
|
||||||
let mut search = Search::new(&rtxn, &index);
|
let mut search = Search::new(&rtxn, &index);
|
||||||
search.query(search::TEST_QUERY);
|
search.query(search::TEST_QUERY);
|
||||||
search.limit(EXTERNAL_DOCUMENTS_IDS.len());
|
search.limit($limit);
|
||||||
|
search.exhaustive_number_hits($exhaustive);
|
||||||
search.authorize_typos(true);
|
search.authorize_typos(true);
|
||||||
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
search.terms_matching_strategy(TermsMatchingStrategy::default());
|
||||||
|
|
||||||
@ -46,6 +47,7 @@ macro_rules! test_distinct {
|
|||||||
Some(d.id)
|
Some(d.id)
|
||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
.take($limit)
|
||||||
.collect();
|
.collect();
|
||||||
|
|
||||||
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
let documents_ids = search::internal_to_external_ids(&index, &documents_ids);
|
||||||
@ -54,25 +56,116 @@ macro_rules! test_distinct {
|
|||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
test_distinct!(
|
||||||
|
exhaustive_distinct_string_default_criteria,
|
||||||
|
tag,
|
||||||
|
true,
|
||||||
|
1,
|
||||||
|
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||||
|
3
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
exhaustive_distinct_number_default_criteria,
|
||||||
|
asc_desc_rank,
|
||||||
|
true,
|
||||||
|
1,
|
||||||
|
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||||
|
7
|
||||||
|
);
|
||||||
|
|
||||||
test_distinct!(
|
test_distinct!(
|
||||||
distinct_string_default_criteria,
|
distinct_string_default_criteria,
|
||||||
tag,
|
tag,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||||
3
|
3
|
||||||
);
|
);
|
||||||
test_distinct!(
|
test_distinct!(
|
||||||
distinct_number_default_criteria,
|
distinct_number_default_criteria,
|
||||||
asc_desc_rank,
|
asc_desc_rank,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
vec![Words, Typo, Proximity, Attribute, Exactness],
|
vec![Words, Typo, Proximity, Attribute, Exactness],
|
||||||
7
|
7
|
||||||
);
|
);
|
||||||
test_distinct!(distinct_string_criterion_words, tag, vec![Words], 3);
|
test_distinct!(
|
||||||
test_distinct!(distinct_number_criterion_words, asc_desc_rank, vec![Words], 7);
|
distinct_string_criterion_words,
|
||||||
test_distinct!(distinct_string_criterion_words_typo, tag, vec![Words, Typo], 3);
|
tag,
|
||||||
test_distinct!(distinct_number_criterion_words_typo, asc_desc_rank, vec![Words, Typo], 7);
|
false,
|
||||||
test_distinct!(distinct_string_criterion_words_proximity, tag, vec![Words, Proximity], 3);
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
test_distinct!(distinct_number_criterion_words_proximity, asc_desc_rank, vec![Words, Proximity], 7);
|
vec![Words],
|
||||||
test_distinct!(distinct_string_criterion_words_attribute, tag, vec![Words, Attribute], 3);
|
3
|
||||||
test_distinct!(distinct_number_criterion_words_attribute, asc_desc_rank, vec![Words, Attribute], 7);
|
);
|
||||||
test_distinct!(distinct_string_criterion_words_exactness, tag, vec![Words, Exactness], 3);
|
test_distinct!(
|
||||||
test_distinct!(distinct_number_criterion_words_exactness, asc_desc_rank, vec![Words, Exactness], 7);
|
distinct_number_criterion_words,
|
||||||
|
asc_desc_rank,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
vec![Words],
|
||||||
|
7
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
distinct_string_criterion_words_typo,
|
||||||
|
tag,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
vec![Words, Typo],
|
||||||
|
3
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
distinct_number_criterion_words_typo,
|
||||||
|
asc_desc_rank,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
vec![Words, Typo],
|
||||||
|
7
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
distinct_string_criterion_words_proximity,
|
||||||
|
tag,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
vec![Words, Proximity],
|
||||||
|
3
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
distinct_number_criterion_words_proximity,
|
||||||
|
asc_desc_rank,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
vec![Words, Proximity],
|
||||||
|
7
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
distinct_string_criterion_words_attribute,
|
||||||
|
tag,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
vec![Words, Attribute],
|
||||||
|
3
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
distinct_number_criterion_words_attribute,
|
||||||
|
asc_desc_rank,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
vec![Words, Attribute],
|
||||||
|
7
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
distinct_string_criterion_words_exactness,
|
||||||
|
tag,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
vec![Words, Exactness],
|
||||||
|
3
|
||||||
|
);
|
||||||
|
test_distinct!(
|
||||||
|
distinct_number_criterion_words_exactness,
|
||||||
|
asc_desc_rank,
|
||||||
|
false,
|
||||||
|
EXTERNAL_DOCUMENTS_IDS.len(),
|
||||||
|
vec![Words, Exactness],
|
||||||
|
7
|
||||||
|
);
|
||||||
|
Loading…
Reference in New Issue
Block a user