Make the distinct work at search

This commit is contained in:
Clément Renault 2024-06-11 11:39:35 -04:00
parent cb765ad249
commit 0d31be1494
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
10 changed files with 77 additions and 2 deletions

View File

@ -597,6 +597,9 @@ pub struct SearchAggregator {
// every time a request has a filter, this field must be incremented by one // every time a request has a filter, this field must be incremented by one
sort_total_number_of_criteria: usize, sort_total_number_of_criteria: usize,
// distinct
distinct: bool,
// filter // filter
filter_with_geo_radius: bool, filter_with_geo_radius: bool,
filter_with_geo_bounding_box: bool, filter_with_geo_bounding_box: bool,
@ -670,6 +673,7 @@ impl SearchAggregator {
show_ranking_score_details, show_ranking_score_details,
filter, filter,
sort, sort,
distinct,
facets: _, facets: _,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
@ -692,6 +696,8 @@ impl SearchAggregator {
ret.sort_sum_of_criteria_terms = sort.len(); ret.sort_sum_of_criteria_terms = sort.len();
} }
ret.distinct = distinct.is_some();
if let Some(ref filter) = filter { if let Some(ref filter) = filter {
static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap()); static RE: Lazy<Regex> = Lazy::new(|| Regex::new("AND | OR").unwrap());
ret.filter_total_number_of_criteria = 1; ret.filter_total_number_of_criteria = 1;
@ -795,6 +801,7 @@ impl SearchAggregator {
sort_with_geo_point, sort_with_geo_point,
sort_sum_of_criteria_terms, sort_sum_of_criteria_terms,
sort_total_number_of_criteria, sort_total_number_of_criteria,
distinct,
filter_with_geo_radius, filter_with_geo_radius,
filter_with_geo_bounding_box, filter_with_geo_bounding_box,
filter_sum_of_criteria_terms, filter_sum_of_criteria_terms,
@ -851,6 +858,9 @@ impl SearchAggregator {
self.sort_total_number_of_criteria = self.sort_total_number_of_criteria =
self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria);
// distinct
self.distinct |= distinct;
// filter // filter
self.filter_with_geo_radius |= filter_with_geo_radius; self.filter_with_geo_radius |= filter_with_geo_radius;
self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box;
@ -921,6 +931,7 @@ impl SearchAggregator {
sort_with_geo_point, sort_with_geo_point,
sort_sum_of_criteria_terms, sort_sum_of_criteria_terms,
sort_total_number_of_criteria, sort_total_number_of_criteria,
distinct,
filter_with_geo_radius, filter_with_geo_radius,
filter_with_geo_bounding_box, filter_with_geo_bounding_box,
filter_sum_of_criteria_terms, filter_sum_of_criteria_terms,
@ -977,6 +988,8 @@ impl SearchAggregator {
"with_geoPoint": sort_with_geo_point, "with_geoPoint": sort_with_geo_point,
"avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64),
}, },
// TODO ask help from María
"distinct": distinct,
"filter": { "filter": {
"with_geoRadius": filter_with_geo_radius, "with_geoRadius": filter_with_geo_radius,
"with_geoBoundingBox": filter_with_geo_bounding_box, "with_geoBoundingBox": filter_with_geo_bounding_box,
@ -1087,6 +1100,7 @@ impl MultiSearchAggregator {
show_matches_position: _, show_matches_position: _,
filter: _, filter: _,
sort: _, sort: _,
distinct: _,
facets: _, facets: _,
highlight_pre_tag: _, highlight_pre_tag: _,
highlight_post_tag: _, highlight_post_tag: _,

View File

@ -123,6 +123,7 @@ impl From<FacetSearchQuery> for SearchQuery {
show_ranking_score_details: false, show_ranking_score_details: false,
filter, filter,
sort: None, sort: None,
distinct: None,
facets: None, facets: None,
highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(),
highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(),

View File

@ -61,6 +61,9 @@ pub struct SearchQueryGet {
filter: Option<String>, filter: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
sort: Option<String>, sort: Option<String>,
// TODO change the InvalidSearchSort to InvalidSearchDistinct error
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSort>)]
distinct: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowMatchesPosition>)]
show_matches_position: Param<bool>, show_matches_position: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)] #[deserr(default, error = DeserrQueryParamError<InvalidSearchShowRankingScore>)]
@ -158,6 +161,7 @@ impl From<SearchQueryGet> for SearchQuery {
attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()),
filter, filter,
sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)),
distinct: other.distinct,
show_matches_position: other.show_matches_position.0, show_matches_position: other.show_matches_position.0,
show_ranking_score: other.show_ranking_score.0, show_ranking_score: other.show_ranking_score.0,
show_ranking_score_details: other.show_ranking_score_details.0, show_ranking_score_details: other.show_ranking_score_details.0,

View File

@ -75,6 +75,9 @@ pub struct SearchQuery {
pub filter: Option<Value>, pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)] #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>, pub sort: Option<Vec<String>>,
// TODO Change the error to InvalidSearchDistinct
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub distinct: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)] #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>, pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())] #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
@ -149,6 +152,7 @@ impl fmt::Debug for SearchQuery {
show_ranking_score_details, show_ranking_score_details,
filter, filter,
sort, sort,
distinct,
facets, facets,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
@ -195,6 +199,9 @@ impl fmt::Debug for SearchQuery {
if let Some(sort) = sort { if let Some(sort) = sort {
debug.field("sort", &sort); debug.field("sort", &sort);
} }
if let Some(distinct) = distinct {
debug.field("distinct", &distinct);
}
if let Some(facets) = facets { if let Some(facets) = facets {
debug.field("facets", &facets); debug.field("facets", &facets);
} }
@ -386,6 +393,9 @@ pub struct SearchQueryWithIndex {
pub filter: Option<Value>, pub filter: Option<Value>,
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)] #[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub sort: Option<Vec<String>>, pub sort: Option<Vec<String>>,
// TODO change error to InvalidSearchDistinct
#[deserr(default, error = DeserrJsonError<InvalidSearchSort>)]
pub distinct: Option<String>,
#[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)] #[deserr(default, error = DeserrJsonError<InvalidSearchFacets>)]
pub facets: Option<Vec<String>>, pub facets: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())] #[deserr(default, error = DeserrJsonError<InvalidSearchHighlightPreTag>, default = DEFAULT_HIGHLIGHT_PRE_TAG())]
@ -421,6 +431,7 @@ impl SearchQueryWithIndex {
show_matches_position, show_matches_position,
filter, filter,
sort, sort,
distinct,
facets, facets,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
@ -448,6 +459,7 @@ impl SearchQueryWithIndex {
show_matches_position, show_matches_position,
filter, filter,
sort, sort,
distinct,
facets, facets,
highlight_pre_tag, highlight_pre_tag,
highlight_post_tag, highlight_post_tag,
@ -716,6 +728,10 @@ fn prepare_search<'t>(
search.ranking_score_threshold(ranking_score_threshold.0); search.ranking_score_threshold(ranking_score_threshold.0);
} }
if let Some(distinct) = &query.distinct {
search.distinct(distinct.clone());
}
match search_kind { match search_kind {
SearchKind::KeywordOnly => { SearchKind::KeywordOnly => {
if let Some(q) = &query.q { if let Some(q) = &query.q {
@ -866,6 +882,7 @@ pub fn perform_search(
matching_strategy: _, matching_strategy: _,
attributes_to_search_on: _, attributes_to_search_on: _,
filter: _, filter: _,
distinct: _,
} = query; } = query;
let format = AttributesFormat { let format = AttributesFormat {

View File

@ -59,6 +59,7 @@ fn main() -> Result<(), Box<dyn Error>> {
false, false,
universe, universe,
&None, &None,
&None,
GeoSortStrategy::default(), GeoSortStrategy::default(),
0, 0,
20, 20,

View File

@ -159,6 +159,7 @@ impl<'a> Search<'a> {
offset: 0, offset: 0,
limit: self.limit + self.offset, limit: self.limit + self.offset,
sort_criteria: self.sort_criteria.clone(), sort_criteria: self.sort_criteria.clone(),
distinct: self.distinct.clone(),
searchable_attributes: self.searchable_attributes, searchable_attributes: self.searchable_attributes,
geo_strategy: self.geo_strategy, geo_strategy: self.geo_strategy,
terms_matching_strategy: self.terms_matching_strategy, terms_matching_strategy: self.terms_matching_strategy,

View File

@ -40,6 +40,7 @@ pub struct Search<'a> {
offset: usize, offset: usize,
limit: usize, limit: usize,
sort_criteria: Option<Vec<AscDesc>>, sort_criteria: Option<Vec<AscDesc>>,
distinct: Option<String>,
searchable_attributes: Option<&'a [String]>, searchable_attributes: Option<&'a [String]>,
geo_strategy: new::GeoSortStrategy, geo_strategy: new::GeoSortStrategy,
terms_matching_strategy: TermsMatchingStrategy, terms_matching_strategy: TermsMatchingStrategy,
@ -61,6 +62,7 @@ impl<'a> Search<'a> {
offset: 0, offset: 0,
limit: 20, limit: 20,
sort_criteria: None, sort_criteria: None,
distinct: None,
searchable_attributes: None, searchable_attributes: None,
geo_strategy: new::GeoSortStrategy::default(), geo_strategy: new::GeoSortStrategy::default(),
terms_matching_strategy: TermsMatchingStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(),
@ -105,6 +107,11 @@ impl<'a> Search<'a> {
self self
} }
pub fn distinct(&mut self, distinct: String) -> &mut Search<'a> {
self.distinct = Some(distinct);
self
}
pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> { pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> {
self.searchable_attributes = Some(searchable); self.searchable_attributes = Some(searchable);
self self
@ -169,6 +176,13 @@ impl<'a> Search<'a> {
ctx.attributes_to_search_on(searchable_attributes)?; ctx.attributes_to_search_on(searchable_attributes)?;
} }
if let Some(distinct) = &self.distinct {
if !ctx.index.filterable_fields(ctx.txn)?.contains(distinct) {
// TODO return a real error message
panic!("Distinct search field is not a filterable attribute");
}
}
let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?; let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?;
let PartialSearchResult { let PartialSearchResult {
located_query_terms, located_query_terms,
@ -185,6 +199,7 @@ impl<'a> Search<'a> {
self.scoring_strategy, self.scoring_strategy,
universe, universe,
&self.sort_criteria, &self.sort_criteria,
&self.distinct,
self.geo_strategy, self.geo_strategy,
self.offset, self.offset,
self.limit, self.limit,
@ -202,6 +217,7 @@ impl<'a> Search<'a> {
self.exhaustive_number_hits, self.exhaustive_number_hits,
universe, universe,
&self.sort_criteria, &self.sort_criteria,
&self.distinct,
self.geo_strategy, self.geo_strategy,
self.offset, self.offset,
self.limit, self.limit,
@ -238,6 +254,7 @@ impl fmt::Debug for Search<'_> {
offset, offset,
limit, limit,
sort_criteria, sort_criteria,
distinct,
searchable_attributes, searchable_attributes,
geo_strategy: _, geo_strategy: _,
terms_matching_strategy, terms_matching_strategy,
@ -257,6 +274,7 @@ impl fmt::Debug for Search<'_> {
.field("offset", offset) .field("offset", offset)
.field("limit", limit) .field("limit", limit)
.field("sort_criteria", sort_criteria) .field("sort_criteria", sort_criteria)
.field("distinct", distinct)
.field("searchable_attributes", searchable_attributes) .field("searchable_attributes", searchable_attributes)
.field("terms_matching_strategy", terms_matching_strategy) .field("terms_matching_strategy", terms_matching_strategy)
.field("scoring_strategy", scoring_strategy) .field("scoring_strategy", scoring_strategy)

View File

@ -22,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx: &mut SearchContext<'ctx>, ctx: &mut SearchContext<'ctx>,
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>, mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,
query: &Q, query: &Q,
distinct: Option<&str>,
universe: &RoaringBitmap, universe: &RoaringBitmap,
from: usize, from: usize,
length: usize, length: usize,
@ -34,7 +35,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
logger.ranking_rules(&ranking_rules); logger.ranking_rules(&ranking_rules);
logger.initial_universe(universe); logger.initial_universe(universe);
let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? { let distinct_field = match distinct {
Some(distinct) => Some(distinct),
None => ctx.index.distinct_field(ctx.txn)?,
};
let distinct_fid = if let Some(field) = distinct_field {
ctx.index.fields_ids_map(ctx.txn)?.id(field) ctx.index.fields_ids_map(ctx.txn)?.id(field)
} else { } else {
None None

View File

@ -516,6 +516,7 @@ mod tests {
false, false,
universe, universe,
&None, &None,
&None,
crate::search::new::GeoSortStrategy::default(), crate::search::new::GeoSortStrategy::default(),
0, 0,
100, 100,

View File

@ -567,6 +567,7 @@ pub fn execute_vector_search(
scoring_strategy: ScoringStrategy, scoring_strategy: ScoringStrategy,
universe: RoaringBitmap, universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>, sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_strategy: geo_sort::Strategy, geo_strategy: geo_sort::Strategy,
from: usize, from: usize,
length: usize, length: usize,
@ -597,6 +598,7 @@ pub fn execute_vector_search(
ctx, ctx,
ranking_rules, ranking_rules,
&PlaceholderQuery, &PlaceholderQuery,
distinct.as_deref(),
&universe, &universe,
from, from,
length, length,
@ -626,6 +628,7 @@ pub fn execute_search(
exhaustive_number_hits: bool, exhaustive_number_hits: bool,
mut universe: RoaringBitmap, mut universe: RoaringBitmap,
sort_criteria: &Option<Vec<AscDesc>>, sort_criteria: &Option<Vec<AscDesc>>,
distinct: &Option<String>,
geo_strategy: geo_sort::Strategy, geo_strategy: geo_sort::Strategy,
from: usize, from: usize,
length: usize, length: usize,
@ -716,6 +719,7 @@ pub fn execute_search(
ctx, ctx,
ranking_rules, ranking_rules,
&graph, &graph,
distinct.as_deref(),
&universe, &universe,
from, from,
length, length,
@ -731,6 +735,7 @@ pub fn execute_search(
ctx, ctx,
ranking_rules, ranking_rules,
&PlaceholderQuery, &PlaceholderQuery,
distinct.as_deref(),
&universe, &universe,
from, from,
length, length,
@ -747,7 +752,14 @@ pub fn execute_search(
// The candidates is the universe unless the exhaustive number of hits // The candidates is the universe unless the exhaustive number of hits
// is requested and a distinct attribute is set. // is requested and a distinct attribute is set.
if exhaustive_number_hits { if exhaustive_number_hits {
if let Some(f) = ctx.index.distinct_field(ctx.txn)? { // TODO Should the distinct search parameter replace the distinct setting?
// Or should we return an error if the distinct search param is set at the same time as the setting is set?
let distinct_field = match distinct.as_deref() {
Some(distinct) => Some(distinct),
None => ctx.index.distinct_field(ctx.txn)?,
};
if let Some(f) = distinct_field {
if let Some(distinct_fid) = fields_ids_map.id(f) { if let Some(distinct_fid) = fields_ids_map.id(f) {
all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining; all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining;
} }