Make the distinct work at search

This commit is contained in:
Clément Renault 2024-06-11 11:39:35 -04:00
parent cb765ad249
commit 0d31be1494
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
10 changed files with 77 additions and 2 deletions

View File

@ -597,6 +597,9 @@ pub struct SearchAggregator {
// every time a request has a filter, this field must be incremented by one
sort_total_number_of_criteria: usize,
// distinct
distinct: bool,
// filter
filter_with_geo_radius: bool,
filter_with_geo_bounding_box: bool,
@ -670,6 +673,7 @@ impl SearchAggregator {
show_ranking_score_details,
filter,
sort,
distinct,
facets: _,
highlight_pre_tag,
highlight_post_tag,
@ -692,6 +696,8 @@ impl SearchAggregator {
ret.sort_sum_of_criteria_terms = sort.len();
}
ret.distinct = distinct.is_some();
if let Some(ref filter) = filter {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
ret.filter_total_number_of_criteria = 1;
@ -795,6 +801,7 @@ impl SearchAggregator {
sort_with_geo_point,
sort_sum_of_criteria_terms,
sort_total_number_of_criteria,
distinct,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
@ -851,6 +858,9 @@ impl SearchAggregator {
self.sort_total_number_of_criteria =
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
// distinct
self.distinct |= distinct;
// filter
self.filter_with_geo_radius |= filter_with_geo_radius;
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
@ -921,6 +931,7 @@ impl SearchAggregator {
sort_with_geo_point,
sort_sum_of_criteria_terms,
sort_total_number_of_criteria,
distinct,
filter_with_geo_radius,
filter_with_geo_bounding_box,
filter_sum_of_criteria_terms,
@ -977,6 +988,8 @@ impl SearchAggregator {
"with_geoPoint": sort_with_geo_point,
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
},
// TODO ask help from María
"distinct": distinct,
"filter": {
"with_geoRadius": filter_with_geo_radius,
"with_geoBoundingBox": filter_with_geo_bounding_box,
@ -1087,6 +1100,7 @@ impl MultiSearchAggregator {
show_matches_position: _,
filter: _,
sort: _,
distinct: _,
facets: _,
highlight_pre_tag: _,
highlight_post_tag: _,

View File

@ -123,6 +123,7 @@ impl From<FacetSearchQuery> for SearchQuery {
show_ranking_score_details: false,
filter,
sort: None,
distinct: None,
facets: None,
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),

View File

@ -61,6 +61,9 @@ pub struct SearchQueryGet {
filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
sort: Option<String>,
// TODO change the InvalidSearchSort to InvalidSearchDistinct error
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
distinct: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
show_matches_position: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
@ -158,6 +161,7 @@ impl From<SearchQueryGet> for SearchQuery {
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
filter,
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
distinct: other.distinct,
show_matches_position: other.show_matches_position.0,
show_ranking_score: other.show_ranking_score.0,
show_ranking_score_details: other.show_ranking_score_details.0,

View File

@ -75,6 +75,9 @@ pub struct SearchQuery {
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
// TODO Change the error to InvalidSearchDistinct
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub distinct: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
@ -149,6 +152,7 @@ impl fmt::Debug for SearchQuery {
show_ranking_score_details,
filter,
sort,
distinct,
facets,
highlight_pre_tag,
highlight_post_tag,
@ -195,6 +199,9 @@ impl fmt::Debug for SearchQuery {
if let Some(sort) = sort {
debug.field("sort", &sort);
}
if let Some(distinct) = distinct {
debug.field("distinct", &distinct);
}
if let Some(facets) = facets {
debug.field("facets", &facets);
}
@ -386,6 +393,9 @@ pub struct SearchQueryWithIndex {
pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>,
// TODO change error to InvalidSearchDistinct
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub distinct: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
@ -421,6 +431,7 @@ impl SearchQueryWithIndex {
show_matches_position,
filter,
sort,
distinct,
facets,
highlight_pre_tag,
highlight_post_tag,
@ -448,6 +459,7 @@ impl SearchQueryWithIndex {
show_matches_position,
filter,
sort,
distinct,
facets,
highlight_pre_tag,
highlight_post_tag,
@ -716,6 +728,10 @@ fn prepare_search<'t>(
search.ranking_score_threshold(ranking_score_threshold.0);
}
if let Some(distinct) = &query.distinct {
search.distinct(distinct.clone());
}
match search_kind {
SearchKind::KeywordOnly => {
if let Some(q) = &query.q {
@ -866,6 +882,7 @@ pub fn perform_search(
matching_strategy: _,
attributes_to_search_on: _,
filter: _,
distinct: _,
} = query;
let format = AttributesFormat {

View File

@ -59,6 +59,7 @@ fn main() -> Result<(), Box<dyn Error>> {
false,
universe,
&None,
&None,
GeoSortStrategy::default(),
0,
20,

View File

@ -159,6 +159,7 @@ impl<'a> Search<'a> {
offset: 0,
limit: self.limit + self.offset,
sort_criteria: self.sort_criteria.clone(),
distinct: self.distinct.clone(),
searchable_attributes: self.searchable_attributes,
geo_strategy: self.geo_strategy,
terms_matching_strategy: self.terms_matching_strategy,

View File

@ -40,6 +40,7 @@ pub struct Search<'a> {
offset: usize,
limit: usize,
sort_criteria: Option<Vec<AscDesc>>,
distinct: Option<String>,
searchable_attributes: Option<&'a [String]>,
geo_strategy: new::GeoSortStrategy,
terms_matching_strategy: TermsMatchingStrategy,
@ -61,6 +62,7 @@ impl<'a> Search<'a> {
offset: 0,
limit: 20,
sort_criteria: None,
distinct: None,
searchable_attributes: None,
geo_strategy: new::GeoSortStrategy::default(),
terms_matching_strategy: TermsMatchingStrategy::default(),
@ -105,6 +107,11 @@ impl<'a> Search<'a> {
self
}
pub fn distinct(&mut self, distinct: String) -> &mut Search<'a> {
self.distinct = Some(distinct);
self
}
pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> {
self.searchable_attributes = Some(searchable);
self
@ -169,6 +176,13 @@ impl<'a> Search<'a> {
ctx.attributes_to_search_on(searchable_attributes)?;
}
if let Some(distinct) = &self.distinct {
if !ctx.index.filterable_fields(ctx.txn)?.contains(distinct) {
// TODO return a real error message
panic!("Distinct search field is not a filterable attribute");
}
}
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
let PartialSearchResult {
located_query_terms,
@ -185,6 +199,7 @@ impl<'a> Search<'a> {
self.scoring_strategy,
universe,
&self.sort_criteria,
&self.distinct,
self.geo_strategy,
self.offset,
self.limit,
@ -202,6 +217,7 @@ impl<'a> Search<'a> {
self.exhaustive_number_hits,
universe,
&self.sort_criteria,
&self.distinct,
self.geo_strategy,
self.offset,
self.limit,
@ -238,6 +254,7 @@ impl fmt::Debug for Search<'_> {
offset,
limit,
sort_criteria,
distinct,
searchable_attributes,
geo_strategy: _,
terms_matching_strategy,
@ -257,6 +274,7 @@ impl fmt::Debug for Search<'_> {
.field("offset", offset)
.field("limit", limit)
.field("sort_criteria", sort_criteria)
.field("distinct", distinct)
.field("searchable_attributes", searchable_attributes)
.field("terms_matching_strategy", terms_matching_strategy)
.field("scoring_strategy", scoring_strategy)

View File

@ -22,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx: &mut SearchContext<'ctx>,
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
query: &Q,
distinct: Option<&str>,
universe: &RoaringBitmap,
from: usize,
length: usize,
@ -34,7 +35,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger.ranking_rules(&ranking_rules);
logger.initial_universe(universe);
let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? {
let distinct_field = match distinct {
Some(distinct) => Some(distinct),
None => ctx.index.distinct_field(ctx.txn)?,
};
let distinct_fid = if let Some(field) = distinct_field {
ctx.index.fields_ids_map(ctx.txn)?.id(field)
} else {
None

View File

@ -516,6 +516,7 @@ mod tests {
false,
universe,
&None,
&None,
crate::search::new::GeoSortStrategy::default(),
0,
100,

View File

@ -567,6 +567,7 @@ pub fn execute_vector_search(
scoring_strategy: ScoringStrategy,
universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_strategy: geo_sort::Strategy,
from: usize,
length: usize,
@ -597,6 +598,7 @@ pub fn execute_vector_search(
ctx,
ranking_rules,
&PlaceholderQuery,
distinct.as_deref(),
&universe,
from,
length,
@ -626,6 +628,7 @@ pub fn execute_search(
exhaustive_number_hits: bool,
mut universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_strategy: geo_sort::Strategy,
from: usize,
length: usize,
@ -716,6 +719,7 @@ pub fn execute_search(
ctx,
ranking_rules,
&graph,
distinct.as_deref(),
&universe,
from,
length,
@ -731,6 +735,7 @@ pub fn execute_search(
ctx,
ranking_rules,
&PlaceholderQuery,
distinct.as_deref(),
&universe,
from,
length,
@ -747,7 +752,14 @@ pub fn execute_search(
// The candidates is the universe unless the exhaustive number of hits
// is requested and a distinct attribute is set.
if exhaustive_number_hits {
if let Some(f) = ctx.index.distinct_field(ctx.txn)? {
// TODO Should the distinct search parameter replace the distinct setting?
// Or should we return an error if the distinct search param is set at the same time as the setting is set?
let distinct_field = match distinct.as_deref() {
Some(distinct) => Some(distinct),
None => ctx.index.distinct_field(ctx.txn)?,
};
if let Some(f) = distinct_field {
if let Some(distinct_fid) = fields_ids_map.id(f) {
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
}