mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-22 21:04:27 +01:00
Compute an exact count when using distinct
This commit is contained in:
parent
a396806343
commit
d71bc1e69f
@ -3,32 +3,35 @@ use roaring::RoaringBitmap;
|
||||
use super::{Criterion, CriterionParameters, CriterionResult};
|
||||
use crate::search::criteria::{resolve_query_tree, Context};
|
||||
use crate::search::query_tree::Operation;
|
||||
use crate::search::Distinct;
|
||||
use crate::Result;
|
||||
|
||||
pub struct Initial<'t> {
|
||||
pub struct Initial<'t, D> {
|
||||
ctx: &'t dyn Context<'t>,
|
||||
answer: Option<CriterionResult>,
|
||||
exhaustive_number_hits: bool,
|
||||
distinct: Option<D>,
|
||||
}
|
||||
|
||||
impl<'t> Initial<'t> {
|
||||
impl<'t, D> Initial<'t, D> {
|
||||
pub fn new(
|
||||
ctx: &'t dyn Context<'t>,
|
||||
query_tree: Option<Operation>,
|
||||
filtered_candidates: Option<RoaringBitmap>,
|
||||
exhaustive_number_hits: bool,
|
||||
) -> Initial {
|
||||
distinct: Option<D>,
|
||||
) -> Initial<D> {
|
||||
let answer = CriterionResult {
|
||||
query_tree,
|
||||
candidates: None,
|
||||
filtered_candidates,
|
||||
bucket_candidates: None,
|
||||
};
|
||||
Initial { ctx, answer: Some(answer), exhaustive_number_hits }
|
||||
Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
|
||||
}
|
||||
}
|
||||
|
||||
impl Criterion for Initial<'_> {
|
||||
impl<D: Distinct> Criterion for Initial<'_, D> {
|
||||
#[logging_timer::time("Initial::{}")]
|
||||
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
|
||||
self.answer
|
||||
@ -41,8 +44,20 @@ impl Criterion for Initial<'_> {
|
||||
&mut params.wdcache,
|
||||
)?;
|
||||
|
||||
answer.candidates = Some(candidates.clone());
|
||||
answer.bucket_candidates = Some(candidates);
|
||||
let bucket_candidates = match &mut self.distinct {
|
||||
// may be really time consuming
|
||||
Some(distinct) => {
|
||||
let mut bucket_candidates = RoaringBitmap::new();
|
||||
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
|
||||
bucket_candidates.insert(c?);
|
||||
}
|
||||
bucket_candidates
|
||||
}
|
||||
None => candidates.clone(),
|
||||
};
|
||||
|
||||
answer.candidates = Some(candidates);
|
||||
answer.bucket_candidates = Some(bucket_candidates);
|
||||
}
|
||||
Ok(answer)
|
||||
})
|
||||
|
@ -13,7 +13,7 @@ use self::typo::Typo;
|
||||
use self::words::Words;
|
||||
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
|
||||
use crate::search::criteria::geo::Geo;
|
||||
use crate::search::{word_derivations, WordDerivationsCache};
|
||||
use crate::search::{word_derivations, Distinct, WordDerivationsCache};
|
||||
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
|
||||
|
||||
mod asc_desc;
|
||||
@ -226,21 +226,26 @@ impl<'t> CriteriaBuilder<'t> {
|
||||
Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
|
||||
}
|
||||
|
||||
pub fn build(
|
||||
pub fn build<D: 't + Distinct>(
|
||||
&'t self,
|
||||
query_tree: Option<Operation>,
|
||||
primitive_query: Option<Vec<PrimitiveQueryPart>>,
|
||||
filtered_candidates: Option<RoaringBitmap>,
|
||||
sort_criteria: Option<Vec<AscDescName>>,
|
||||
exhaustive_number_hits: bool,
|
||||
distinct: Option<D>,
|
||||
) -> Result<Final<'t>> {
|
||||
use crate::criterion::Criterion as Name;
|
||||
|
||||
let primitive_query = primitive_query.unwrap_or_default();
|
||||
|
||||
let mut criterion =
|
||||
Box::new(Initial::new(self, query_tree, filtered_candidates, exhaustive_number_hits))
|
||||
as Box<dyn Criterion>;
|
||||
let mut criterion = Box::new(Initial::new(
|
||||
self,
|
||||
query_tree,
|
||||
filtered_candidates,
|
||||
exhaustive_number_hits,
|
||||
distinct,
|
||||
)) as Box<dyn Criterion>;
|
||||
for name in self.index.criteria(&self.rtxn)? {
|
||||
criterion = match name {
|
||||
Name::Words => Box::new(Words::new(self, criterion)),
|
||||
|
@ -348,6 +348,7 @@ mod test {
|
||||
use super::super::initial::Initial;
|
||||
use super::super::test::TestContext;
|
||||
use super::*;
|
||||
use crate::search::NoopDistinct;
|
||||
|
||||
fn display_criteria(mut criteria: Typo, mut parameters: CriterionParameters) -> String {
|
||||
let mut result = String::new();
|
||||
@ -368,7 +369,8 @@ mod test {
|
||||
excluded_candidates: &RoaringBitmap::new(),
|
||||
};
|
||||
|
||||
let parent = Initial::new(&context, query_tree, facet_candidates, false);
|
||||
let parent =
|
||||
Initial::<NoopDistinct>::new(&context, query_tree, facet_candidates, false, None);
|
||||
let criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
let result = display_criteria(criteria, criterion_parameters);
|
||||
@ -405,7 +407,8 @@ mod test {
|
||||
wdcache: &mut WordDerivationsCache::new(),
|
||||
excluded_candidates: &RoaringBitmap::new(),
|
||||
};
|
||||
let parent = Initial::new(&context, Some(query_tree), facet_candidates, false);
|
||||
let parent =
|
||||
Initial::<NoopDistinct>::new(&context, Some(query_tree), facet_candidates, false, None);
|
||||
let criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
let result = display_criteria(criteria, criterion_parameters);
|
||||
@ -439,7 +442,13 @@ mod test {
|
||||
wdcache: &mut WordDerivationsCache::new(),
|
||||
excluded_candidates: &RoaringBitmap::new(),
|
||||
};
|
||||
let parent = Initial::new(&context, query_tree, Some(facet_candidates.clone()), false);
|
||||
let parent = Initial::<NoopDistinct>::new(
|
||||
&context,
|
||||
query_tree,
|
||||
Some(facet_candidates.clone()),
|
||||
false,
|
||||
None,
|
||||
);
|
||||
let criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
let result = display_criteria(criteria, criterion_parameters);
|
||||
@ -476,8 +485,13 @@ mod test {
|
||||
wdcache: &mut WordDerivationsCache::new(),
|
||||
excluded_candidates: &RoaringBitmap::new(),
|
||||
};
|
||||
let parent =
|
||||
Initial::new(&context, Some(query_tree), Some(facet_candidates.clone()), false);
|
||||
let parent = Initial::<NoopDistinct>::new(
|
||||
&context,
|
||||
Some(query_tree),
|
||||
Some(facet_candidates.clone()),
|
||||
false,
|
||||
None,
|
||||
);
|
||||
let criteria = Typo::new(&context, Box::new(parent));
|
||||
|
||||
let result = display_criteria(criteria, criterion_parameters);
|
||||
|
@ -21,6 +21,7 @@ const DOCID_SIZE: usize = size_of::<DocumentId>();
|
||||
/// care to keep the document we are currently on, and remove it from the excluded list. The next
|
||||
/// iterations will never contain any occurence of a document with the same distinct value as a
|
||||
/// document from previous iterations.
|
||||
#[derive(Clone)]
|
||||
pub struct FacetDistinct<'a> {
|
||||
distinct: FieldId,
|
||||
index: &'a Index,
|
||||
|
@ -191,21 +191,33 @@ impl<'a> Search<'a> {
|
||||
}
|
||||
|
||||
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
|
||||
let criteria = criteria_builder.build(
|
||||
query_tree,
|
||||
primitive_query,
|
||||
filtered_candidates,
|
||||
self.sort_criteria.clone(),
|
||||
self.exhaustive_number_hits,
|
||||
)?;
|
||||
|
||||
match self.index.distinct_field(self.rtxn)? {
|
||||
None => self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria),
|
||||
None => {
|
||||
let criteria = criteria_builder.build::<NoopDistinct>(
|
||||
query_tree,
|
||||
primitive_query,
|
||||
filtered_candidates,
|
||||
self.sort_criteria.clone(),
|
||||
self.exhaustive_number_hits,
|
||||
None,
|
||||
)?;
|
||||
self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria)
|
||||
}
|
||||
Some(name) => {
|
||||
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
|
||||
match field_ids_map.id(name) {
|
||||
Some(fid) => {
|
||||
let distinct = FacetDistinct::new(fid, self.index, self.rtxn);
|
||||
|
||||
let criteria = criteria_builder.build(
|
||||
query_tree,
|
||||
primitive_query,
|
||||
filtered_candidates,
|
||||
self.sort_criteria.clone(),
|
||||
self.exhaustive_number_hits,
|
||||
Some(distinct.clone()),
|
||||
)?;
|
||||
self.perform_sort(distinct, matching_words.unwrap_or_default(), criteria)
|
||||
}
|
||||
None => Ok(SearchResult::default()),
|
||||
|
Loading…
Reference in New Issue
Block a user