Compute an exact count when using distinct

This commit is contained in:
ManyTheFish 2022-07-18 16:52:45 +02:00
parent a396806343
commit d71bc1e69f
5 changed files with 72 additions and 25 deletions

View File

@ -3,32 +3,35 @@ use roaring::RoaringBitmap;
use super::{Criterion, CriterionParameters, CriterionResult}; use super::{Criterion, CriterionParameters, CriterionResult};
use crate::search::criteria::{resolve_query_tree, Context}; use crate::search::criteria::{resolve_query_tree, Context};
use crate::search::query_tree::Operation; use crate::search::query_tree::Operation;
use crate::search::Distinct;
use crate::Result; use crate::Result;
pub struct Initial<'t> { pub struct Initial<'t, D> {
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
answer: Option<CriterionResult>, answer: Option<CriterionResult>,
exhaustive_number_hits: bool, exhaustive_number_hits: bool,
distinct: Option<D>,
} }
impl<'t> Initial<'t> { impl<'t, D> Initial<'t, D> {
pub fn new( pub fn new(
ctx: &'t dyn Context<'t>, ctx: &'t dyn Context<'t>,
query_tree: Option<Operation>, query_tree: Option<Operation>,
filtered_candidates: Option<RoaringBitmap>, filtered_candidates: Option<RoaringBitmap>,
exhaustive_number_hits: bool, exhaustive_number_hits: bool,
) -> Initial { distinct: Option<D>,
) -> Initial<D> {
let answer = CriterionResult { let answer = CriterionResult {
query_tree, query_tree,
candidates: None, candidates: None,
filtered_candidates, filtered_candidates,
bucket_candidates: None, bucket_candidates: None,
}; };
Initial { ctx, answer: Some(answer), exhaustive_number_hits } Initial { ctx, answer: Some(answer), exhaustive_number_hits, distinct }
} }
} }
impl Criterion for Initial<'_> { impl<D: Distinct> Criterion for Initial<'_, D> {
#[logging_timer::time("Initial::{}")] #[logging_timer::time("Initial::{}")]
fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> { fn next(&mut self, params: &mut CriterionParameters) -> Result<Option<CriterionResult>> {
self.answer self.answer
@ -41,8 +44,20 @@ impl Criterion for Initial<'_> {
&mut params.wdcache, &mut params.wdcache,
)?; )?;
answer.candidates = Some(candidates.clone()); let bucket_candidates = match &mut self.distinct {
answer.bucket_candidates = Some(candidates); // may be really time consuming
Some(distinct) => {
let mut bucket_candidates = RoaringBitmap::new();
for c in distinct.distinct(candidates.clone(), RoaringBitmap::new()) {
bucket_candidates.insert(c?);
}
bucket_candidates
}
None => candidates.clone(),
};
answer.candidates = Some(candidates);
answer.bucket_candidates = Some(bucket_candidates);
} }
Ok(answer) Ok(answer)
}) })

View File

@ -13,7 +13,7 @@ use self::typo::Typo;
use self::words::Words; use self::words::Words;
use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind}; use super::query_tree::{Operation, PrimitiveQueryPart, Query, QueryKind};
use crate::search::criteria::geo::Geo; use crate::search::criteria::geo::Geo;
use crate::search::{word_derivations, WordDerivationsCache}; use crate::search::{word_derivations, Distinct, WordDerivationsCache};
use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result}; use crate::{AscDesc as AscDescName, DocumentId, FieldId, Index, Member, Result};
mod asc_desc; mod asc_desc;
@ -226,21 +226,26 @@ impl<'t> CriteriaBuilder<'t> {
Ok(Self { rtxn, index, words_fst, words_prefixes_fst }) Ok(Self { rtxn, index, words_fst, words_prefixes_fst })
} }
pub fn build( pub fn build<D: 't + Distinct>(
&'t self, &'t self,
query_tree: Option<Operation>, query_tree: Option<Operation>,
primitive_query: Option<Vec<PrimitiveQueryPart>>, primitive_query: Option<Vec<PrimitiveQueryPart>>,
filtered_candidates: Option<RoaringBitmap>, filtered_candidates: Option<RoaringBitmap>,
sort_criteria: Option<Vec<AscDescName>>, sort_criteria: Option<Vec<AscDescName>>,
exhaustive_number_hits: bool, exhaustive_number_hits: bool,
distinct: Option<D>,
) -> Result<Final<'t>> { ) -> Result<Final<'t>> {
use crate::criterion::Criterion as Name; use crate::criterion::Criterion as Name;
let primitive_query = primitive_query.unwrap_or_default(); let primitive_query = primitive_query.unwrap_or_default();
let mut criterion = let mut criterion = Box::new(Initial::new(
Box::new(Initial::new(self, query_tree, filtered_candidates, exhaustive_number_hits)) self,
as Box<dyn Criterion>; query_tree,
filtered_candidates,
exhaustive_number_hits,
distinct,
)) as Box<dyn Criterion>;
for name in self.index.criteria(&self.rtxn)? { for name in self.index.criteria(&self.rtxn)? {
criterion = match name { criterion = match name {
Name::Words => Box::new(Words::new(self, criterion)), Name::Words => Box::new(Words::new(self, criterion)),

View File

@ -348,6 +348,7 @@ mod test {
use super::super::initial::Initial; use super::super::initial::Initial;
use super::super::test::TestContext; use super::super::test::TestContext;
use super::*; use super::*;
use crate::search::NoopDistinct;
fn display_criteria(mut criteria: Typo, mut parameters: CriterionParameters) -> String { fn display_criteria(mut criteria: Typo, mut parameters: CriterionParameters) -> String {
let mut result = String::new(); let mut result = String::new();
@ -368,7 +369,8 @@ mod test {
excluded_candidates: &RoaringBitmap::new(), excluded_candidates: &RoaringBitmap::new(),
}; };
let parent = Initial::new(&context, query_tree, facet_candidates, false); let parent =
Initial::<NoopDistinct>::new(&context, query_tree, facet_candidates, false, None);
let criteria = Typo::new(&context, Box::new(parent)); let criteria = Typo::new(&context, Box::new(parent));
let result = display_criteria(criteria, criterion_parameters); let result = display_criteria(criteria, criterion_parameters);
@ -405,7 +407,8 @@ mod test {
wdcache: &mut WordDerivationsCache::new(), wdcache: &mut WordDerivationsCache::new(),
excluded_candidates: &RoaringBitmap::new(), excluded_candidates: &RoaringBitmap::new(),
}; };
let parent = Initial::new(&context, Some(query_tree), facet_candidates, false); let parent =
Initial::<NoopDistinct>::new(&context, Some(query_tree), facet_candidates, false, None);
let criteria = Typo::new(&context, Box::new(parent)); let criteria = Typo::new(&context, Box::new(parent));
let result = display_criteria(criteria, criterion_parameters); let result = display_criteria(criteria, criterion_parameters);
@ -439,7 +442,13 @@ mod test {
wdcache: &mut WordDerivationsCache::new(), wdcache: &mut WordDerivationsCache::new(),
excluded_candidates: &RoaringBitmap::new(), excluded_candidates: &RoaringBitmap::new(),
}; };
let parent = Initial::new(&context, query_tree, Some(facet_candidates.clone()), false); let parent = Initial::<NoopDistinct>::new(
&context,
query_tree,
Some(facet_candidates.clone()),
false,
None,
);
let criteria = Typo::new(&context, Box::new(parent)); let criteria = Typo::new(&context, Box::new(parent));
let result = display_criteria(criteria, criterion_parameters); let result = display_criteria(criteria, criterion_parameters);
@ -476,8 +485,13 @@ mod test {
wdcache: &mut WordDerivationsCache::new(), wdcache: &mut WordDerivationsCache::new(),
excluded_candidates: &RoaringBitmap::new(), excluded_candidates: &RoaringBitmap::new(),
}; };
let parent = let parent = Initial::<NoopDistinct>::new(
Initial::new(&context, Some(query_tree), Some(facet_candidates.clone()), false); &context,
Some(query_tree),
Some(facet_candidates.clone()),
false,
None,
);
let criteria = Typo::new(&context, Box::new(parent)); let criteria = Typo::new(&context, Box::new(parent));
let result = display_criteria(criteria, criterion_parameters); let result = display_criteria(criteria, criterion_parameters);

View File

@ -21,6 +21,7 @@ const DOCID_SIZE: usize = size_of::<DocumentId>();
/// care to keep the document we are currently on, and remove it from the excluded list. The next /// care to keep the document we are currently on, and remove it from the excluded list. The next
/// iterations will never contain any occurence of a document with the same distinct value as a /// iterations will never contain any occurence of a document with the same distinct value as a
/// document from previous iterations. /// document from previous iterations.
#[derive(Clone)]
pub struct FacetDistinct<'a> { pub struct FacetDistinct<'a> {
distinct: FieldId, distinct: FieldId,
index: &'a Index, index: &'a Index,

View File

@ -191,21 +191,33 @@ impl<'a> Search<'a> {
} }
let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?; let criteria_builder = criteria::CriteriaBuilder::new(self.rtxn, self.index)?;
match self.index.distinct_field(self.rtxn)? {
None => {
let criteria = criteria_builder.build::<NoopDistinct>(
query_tree,
primitive_query,
filtered_candidates,
self.sort_criteria.clone(),
self.exhaustive_number_hits,
None,
)?;
self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria)
}
Some(name) => {
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
match field_ids_map.id(name) {
Some(fid) => {
let distinct = FacetDistinct::new(fid, self.index, self.rtxn);
let criteria = criteria_builder.build( let criteria = criteria_builder.build(
query_tree, query_tree,
primitive_query, primitive_query,
filtered_candidates, filtered_candidates,
self.sort_criteria.clone(), self.sort_criteria.clone(),
self.exhaustive_number_hits, self.exhaustive_number_hits,
Some(distinct.clone()),
)?; )?;
match self.index.distinct_field(self.rtxn)? {
None => self.perform_sort(NoopDistinct, matching_words.unwrap_or_default(), criteria),
Some(name) => {
let field_ids_map = self.index.fields_ids_map(self.rtxn)?;
match field_ids_map.id(name) {
Some(fid) => {
let distinct = FacetDistinct::new(fid, self.index, self.rtxn);
self.perform_sort(distinct, matching_words.unwrap_or_default(), criteria) self.perform_sort(distinct, matching_words.unwrap_or_default(), criteria)
} }
None => Ok(SearchResult::default()), None => Ok(SearchResult::default()),