From 0d31be149437dfaa6133e8971c4a333cf82bd4d2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 11 Jun 2024 11:39:35 -0400 Subject: [PATCH 1/9] Make the distinct work at search --- meilisearch/src/analytics/segment_analytics.rs | 14 ++++++++++++++ meilisearch/src/routes/indexes/facet_search.rs | 1 + meilisearch/src/routes/indexes/search.rs | 4 ++++ meilisearch/src/search.rs | 17 +++++++++++++++++ milli/examples/search.rs | 1 + milli/src/search/hybrid.rs | 1 + milli/src/search/mod.rs | 18 ++++++++++++++++++ milli/src/search/new/bucket_sort.rs | 8 +++++++- milli/src/search/new/matches/mod.rs | 1 + milli/src/search/new/mod.rs | 14 +++++++++++++- 10 files changed, 77 insertions(+), 2 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index aed29e612..ebd808b42 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -597,6 +597,9 @@ pub struct SearchAggregator { // every time a request has a filter, this field must be incremented by one sort_total_number_of_criteria: usize, + // distinct + distinct: bool, + // filter filter_with_geo_radius: bool, filter_with_geo_bounding_box: bool, @@ -670,6 +673,7 @@ impl SearchAggregator { show_ranking_score_details, filter, sort, + distinct, facets: _, highlight_pre_tag, highlight_post_tag, @@ -692,6 +696,8 @@ impl SearchAggregator { ret.sort_sum_of_criteria_terms = sort.len(); } + ret.distinct = distinct.is_some(); + if let Some(ref filter) = filter { static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); ret.filter_total_number_of_criteria = 1; @@ -795,6 +801,7 @@ impl SearchAggregator { sort_with_geo_point, sort_sum_of_criteria_terms, sort_total_number_of_criteria, + distinct, filter_with_geo_radius, filter_with_geo_bounding_box, filter_sum_of_criteria_terms, @@ -851,6 +858,9 @@ impl SearchAggregator { self.sort_total_number_of_criteria = self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); + // distinct + self.distinct |= distinct; + // filter self.filter_with_geo_radius |= filter_with_geo_radius; self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; @@ -921,6 +931,7 @@ impl SearchAggregator { sort_with_geo_point, sort_sum_of_criteria_terms, sort_total_number_of_criteria, + distinct, filter_with_geo_radius, filter_with_geo_bounding_box, filter_sum_of_criteria_terms, @@ -977,6 +988,8 @@ impl SearchAggregator { "with_geoPoint": sort_with_geo_point, "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), }, + // TODO ask help from María + "distinct": distinct, "filter": { "with_geoRadius": filter_with_geo_radius, "with_geoBoundingBox": filter_with_geo_bounding_box, @@ -1087,6 +1100,7 @@ impl MultiSearchAggregator { show_matches_position: _, filter: _, sort: _, + distinct: _, facets: _, highlight_pre_tag: _, highlight_post_tag: _, diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 10b371f2d..4b3f73115 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -123,6 +123,7 @@ impl From for SearchQuery { show_ranking_score_details: false, filter, sort: None, + distinct: None, facets: None, highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 348d8295c..6ea6802d9 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -61,6 +61,9 @@ pub struct SearchQueryGet { filter: Option, #[deserr(default, error = DeserrQueryParamError)] sort: Option, + // TODO change the InvalidSearchSort to InvalidSearchDistinct error + #[deserr(default, error = DeserrQueryParamError)] + distinct: Option, #[deserr(default, error = DeserrQueryParamError)] show_matches_position: Param, #[deserr(default, error = DeserrQueryParamError)] @@ -158,6 +161,7 @@ impl From for SearchQuery { attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), filter, sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), + distinct: other.distinct, show_matches_position: other.show_matches_position.0, show_ranking_score: other.show_ranking_score.0, show_ranking_score_details: other.show_ranking_score_details.0, diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 05b3c1aff..edc3feb5d 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -75,6 +75,9 @@ pub struct SearchQuery { pub filter: Option, #[deserr(default, error = DeserrJsonError)] pub sort: Option>, + // TODO Change the error to InvalidSearchDistinct + #[deserr(default, error = DeserrJsonError)] + pub distinct: Option, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] @@ -149,6 +152,7 @@ impl fmt::Debug for SearchQuery { show_ranking_score_details, filter, sort, + distinct, facets, highlight_pre_tag, highlight_post_tag, @@ -195,6 +199,9 @@ impl fmt::Debug for SearchQuery { if let Some(sort) = sort { debug.field("sort", &sort); } + if let Some(distinct) = distinct { + debug.field("distinct", &distinct); + } if let Some(facets) = facets { debug.field("facets", &facets); } @@ -386,6 +393,9 @@ pub struct SearchQueryWithIndex { pub filter: Option, #[deserr(default, error = DeserrJsonError)] pub sort: Option>, + // TODO change error to InvalidSearchDistinct + #[deserr(default, error = DeserrJsonError)] + pub distinct: Option, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] @@ -421,6 +431,7 @@ impl SearchQueryWithIndex { show_matches_position, filter, sort, + distinct, facets, highlight_pre_tag, highlight_post_tag, @@ -448,6 +459,7 @@ impl SearchQueryWithIndex { show_matches_position, filter, sort, + distinct, facets, highlight_pre_tag, highlight_post_tag, @@ -716,6 +728,10 @@ fn prepare_search<'t>( search.ranking_score_threshold(ranking_score_threshold.0); } + if let Some(distinct) = &query.distinct { + search.distinct(distinct.clone()); + } + match search_kind { SearchKind::KeywordOnly => { if let Some(q) = &query.q { @@ -866,6 +882,7 @@ pub fn perform_search( matching_strategy: _, attributes_to_search_on: _, filter: _, + distinct: _, } = query; let format = AttributesFormat { diff --git a/milli/examples/search.rs b/milli/examples/search.rs index 0195c396f..87020994a 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -59,6 +59,7 @@ fn main() -> Result<(), Box> { false, universe, &None, + &None, GeoSortStrategy::default(), 0, 20, diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index 87f922c4c..1c784097d 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -159,6 +159,7 @@ impl<'a> Search<'a> { offset: 0, limit: self.limit + self.offset, sort_criteria: self.sort_criteria.clone(), + distinct: self.distinct.clone(), searchable_attributes: self.searchable_attributes, geo_strategy: self.geo_strategy, terms_matching_strategy: self.terms_matching_strategy, diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 49d73ff31..d937875da 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -40,6 +40,7 @@ pub struct Search<'a> { offset: usize, limit: usize, sort_criteria: Option>, + distinct: Option, searchable_attributes: Option<&'a [String]>, geo_strategy: new::GeoSortStrategy, terms_matching_strategy: TermsMatchingStrategy, @@ -61,6 +62,7 @@ impl<'a> Search<'a> { offset: 0, limit: 20, sort_criteria: None, + distinct: None, searchable_attributes: None, geo_strategy: new::GeoSortStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(), @@ -105,6 +107,11 @@ impl<'a> Search<'a> { self } + pub fn distinct(&mut self, distinct: String) -> &mut Search<'a> { + self.distinct = Some(distinct); + self + } + pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> { self.searchable_attributes = Some(searchable); self @@ -169,6 +176,13 @@ impl<'a> Search<'a> { ctx.attributes_to_search_on(searchable_attributes)?; } + if let Some(distinct) = &self.distinct { + if !ctx.index.filterable_fields(ctx.txn)?.contains(distinct) { + // TODO return a real error message + panic!("Distinct search field is not a filterable attribute"); + } + } + let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?; let PartialSearchResult { located_query_terms, @@ -185,6 +199,7 @@ impl<'a> Search<'a> { self.scoring_strategy, universe, &self.sort_criteria, + &self.distinct, self.geo_strategy, self.offset, self.limit, @@ -202,6 +217,7 @@ impl<'a> Search<'a> { self.exhaustive_number_hits, universe, &self.sort_criteria, + &self.distinct, self.geo_strategy, self.offset, self.limit, @@ -238,6 +254,7 @@ impl fmt::Debug for Search<'_> { offset, limit, sort_criteria, + distinct, searchable_attributes, geo_strategy: _, terms_matching_strategy, @@ -257,6 +274,7 @@ impl fmt::Debug for Search<'_> { .field("offset", offset) .field("limit", limit) .field("sort_criteria", sort_criteria) + .field("distinct", distinct) .field("searchable_attributes", searchable_attributes) .field("terms_matching_strategy", terms_matching_strategy) .field("scoring_strategy", scoring_strategy) diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index d937c78bf..9255e4c09 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -22,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( ctx: &mut SearchContext<'ctx>, mut ranking_rules: Vec>, query: &Q, + distinct: Option<&str>, universe: &RoaringBitmap, from: usize, length: usize, @@ -34,7 +35,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( logger.ranking_rules(&ranking_rules); logger.initial_universe(universe); - let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? { + let distinct_field = match distinct { + Some(distinct) => Some(distinct), + None => ctx.index.distinct_field(ctx.txn)?, + }; + + let distinct_fid = if let Some(field) = distinct_field { ctx.index.fields_ids_map(ctx.txn)?.id(field) } else { None diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index 87ddb2915..77ae5fcd5 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -516,6 +516,7 @@ mod tests { false, universe, &None, + &None, crate::search::new::GeoSortStrategy::default(), 0, 100, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 623c72567..257f81539 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -567,6 +567,7 @@ pub fn execute_vector_search( scoring_strategy: ScoringStrategy, universe: RoaringBitmap, sort_criteria: &Option>, + distinct: &Option, geo_strategy: geo_sort::Strategy, from: usize, length: usize, @@ -597,6 +598,7 @@ pub fn execute_vector_search( ctx, ranking_rules, &PlaceholderQuery, + distinct.as_deref(), &universe, from, length, @@ -626,6 +628,7 @@ pub fn execute_search( exhaustive_number_hits: bool, mut universe: RoaringBitmap, sort_criteria: &Option>, + distinct: &Option, geo_strategy: geo_sort::Strategy, from: usize, length: usize, @@ -716,6 +719,7 @@ pub fn execute_search( ctx, ranking_rules, &graph, + distinct.as_deref(), &universe, from, length, @@ -731,6 +735,7 @@ pub fn execute_search( ctx, ranking_rules, &PlaceholderQuery, + distinct.as_deref(), &universe, from, length, @@ -747,7 +752,14 @@ pub fn execute_search( // The candidates is the universe unless the exhaustive number of hits // is requested and a distinct attribute is set. if exhaustive_number_hits { - if let Some(f) = ctx.index.distinct_field(ctx.txn)? { + // TODO Should the distinct search parameter replace the distinct setting? + // Or should we return an error if the distinct search param is set at the same time as the setting is set? + let distinct_field = match distinct.as_deref() { + Some(distinct) => Some(distinct), + None => ctx.index.distinct_field(ctx.txn)?, + }; + + if let Some(f) = distinct_field { if let Some(distinct_fid) = fields_ids_map.id(f) { all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining; } From ee39309aaeb1a595e9489351aac41223b61a1d79 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 11 Jun 2024 16:03:39 -0400 Subject: [PATCH 2/9] Improve errors and introduce a new InvalidSearchDistinct error code --- meilisearch-types/src/error.rs | 4 +++- meilisearch/src/routes/indexes/search.rs | 3 +-- meilisearch/src/search.rs | 6 ++---- milli/src/error.rs | 11 +++++++++++ milli/src/search/mod.rs | 16 +++++++++++----- 5 files changed, 28 insertions(+), 12 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 150c56b9d..1d91887e7 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -270,13 +270,14 @@ InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ; InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSearchSort , InvalidRequest , BAD_REQUEST ; +InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ; InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ; InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ; InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ; -InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ; +InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ; InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ; InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ; InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ; @@ -381,6 +382,7 @@ impl ErrorCode for milli::Error { Code::IndexPrimaryKeyMultipleCandidatesFound } UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists, + UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct, UserError::SortRankingRuleMissing => Code::InvalidSearchSort, UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets, UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort, diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 6ea6802d9..cf179a234 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -61,8 +61,7 @@ pub struct SearchQueryGet { filter: Option, #[deserr(default, error = DeserrQueryParamError)] sort: Option, - // TODO change the InvalidSearchSort to InvalidSearchDistinct error - #[deserr(default, error = DeserrQueryParamError)] + #[deserr(default, error = DeserrQueryParamError)] distinct: Option, #[deserr(default, error = DeserrQueryParamError)] show_matches_position: Param, diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index edc3feb5d..522577cde 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -75,8 +75,7 @@ pub struct SearchQuery { pub filter: Option, #[deserr(default, error = DeserrJsonError)] pub sort: Option>, - // TODO Change the error to InvalidSearchDistinct - #[deserr(default, error = DeserrJsonError)] + #[deserr(default, error = DeserrJsonError)] pub distinct: Option, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, @@ -393,8 +392,7 @@ pub struct SearchQueryWithIndex { pub filter: Option, #[deserr(default, error = DeserrJsonError)] pub sort: Option>, - // TODO change error to InvalidSearchDistinct - #[deserr(default, error = DeserrJsonError)] + #[deserr(default, error = DeserrJsonError)] pub distinct: Option, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, diff --git a/milli/src/error.rs b/milli/src/error.rs index 83754afe4..7420ce667 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -134,6 +134,17 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco } )] InvalidSortableAttribute { field: String, valid_fields: BTreeSet, hidden_fields: bool }, + #[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}", + .field, + match .valid_fields.is_empty() { + true => "This index does not have configured filterable attributes.".to_string(), + false => format!("Available filterable attributes are: `{}{}`.", + valid_fields.iter().map(AsRef::as_ref).collect::>().join(", "), + .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""), + ), + } + )] + InvalidDistinctAttribute { field: String, valid_fields: BTreeSet, hidden_fields: bool }, #[error("Attribute `{}` is not facet-searchable. {}", .field, match .valid_fields.is_empty() { diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index d937875da..922b72d04 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -11,8 +11,8 @@ use self::new::{execute_vector_search, PartialSearchResult}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::vector::Embedder; use crate::{ - execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, - SearchContext, TimeBudget, + execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Error, Index, + Result, SearchContext, TimeBudget, UserError, }; // Building these factories is not free. @@ -177,9 +177,15 @@ impl<'a> Search<'a> { } if let Some(distinct) = &self.distinct { - if !ctx.index.filterable_fields(ctx.txn)?.contains(distinct) { - // TODO return a real error message - panic!("Distinct search field is not a filterable attribute"); + let filterable_fields = ctx.index.filterable_fields(ctx.txn)?; + if !filterable_fields.contains(distinct) { + let (valid_fields, hidden_fields) = + ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?; + return Err(Error::UserError(UserError::InvalidDistinctAttribute { + field: distinct.clone(), + valid_fields, + hidden_fields, + })); } } From 1991bd03daf30d29b612ff613a463ad94b98d6f4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 11 Jun 2024 17:02:39 -0400 Subject: [PATCH 3/9] Distinct at search erases the distinct in the settings --- milli/src/search/new/mod.rs | 2 -- 1 file changed, 2 deletions(-) diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 257f81539..5921e27eb 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -752,8 +752,6 @@ pub fn execute_search( // The candidates is the universe unless the exhaustive number of hits // is requested and a distinct attribute is set. if exhaustive_number_hits { - // TODO Should the distinct search parameter replace the distinct setting? - // Or should we return an error if the distinct search param is set at the same time as the setting is set? let distinct_field = match distinct.as_deref() { Some(distinct) => Some(distinct), None => ctx.index.distinct_field(ctx.txn)?, From 39f60abd7d02d5e6207fb653835f54ac0e37cdd2 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 11 Jun 2024 17:53:53 -0400 Subject: [PATCH 4/9] Add and modify distinct tests --- milli/src/search/new/tests/distinct.rs | 65 ++++++++++++++++++++++---- 1 file changed, 56 insertions(+), 9 deletions(-) diff --git a/milli/src/search/new/tests/distinct.rs b/milli/src/search/new/tests/distinct.rs index c54600f27..75c00da2a 100644 --- a/milli/src/search/new/tests/distinct.rs +++ b/milli/src/search/new/tests/distinct.rs @@ -205,8 +205,18 @@ fn create_index() -> TempIndex { index } -fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec { - let vs = collect_field_values(index, txn, index.distinct_field(txn).unwrap().unwrap(), docids); +fn verify_distinct( + index: &Index, + txn: &RoTxn, + distinct: Option<&str>, + docids: &[u32], +) -> Vec { + let vs = collect_field_values( + index, + txn, + distinct.or_else(|| index.distinct_field(txn).unwrap()).unwrap(), + docids, + ); let mut unique = HashSet::new(); for v in vs.iter() { @@ -223,12 +233,49 @@ fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec { fn test_distinct_placeholder_no_ranking_rules() { let index = create_index(); + // Set the letter as filterable and unset the distinct attribute. + index + .update_settings(|s| { + s.set_filterable_fields(hashset! { S("letter") }); + s.reset_distinct_field(); + }) + .unwrap(); + + let txn = index.read_txn().unwrap(); + + let mut s = Search::new(&txn, &index); + s.distinct(S("letter")); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]"); + let distinct_values = verify_distinct(&index, &txn, Some("letter"), &documents_ids); + insta::assert_debug_snapshot!(distinct_values, @r###" + [ + "\"A\"", + "\"B\"", + "\"C\"", + "\"D\"", + "\"E\"", + "\"F\"", + "\"G\"", + "\"H\"", + "\"I\"", + "__does_not_exist__", + "__does_not_exist__", + "__does_not_exist__", + ] + "###); +} + +#[test] +fn test_distinct_at_search_placeholder_no_ranking_rules() { + let index = create_index(); + let txn = index.read_txn().unwrap(); let s = Search::new(&txn, &index); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"A\"", @@ -263,7 +310,7 @@ fn test_distinct_placeholder_sort() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"E\"", @@ -303,7 +350,7 @@ fn test_distinct_placeholder_sort() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"I\"", @@ -346,7 +393,7 @@ fn test_distinct_placeholder_sort() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"I\"", @@ -399,7 +446,7 @@ fn test_distinct_words() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"A\"", @@ -453,7 +500,7 @@ fn test_distinct_sort_words() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"I\"", @@ -549,7 +596,7 @@ fn test_distinct_typo() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"B\"", From 6bf07d969e71b7661970bbcdcef4f2611c3a19dd Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 13 Jun 2024 15:49:42 +0200 Subject: [PATCH 5/9] add failing test --- meilisearch/tests/search/distinct.rs | 61 ++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/meilisearch/tests/search/distinct.rs b/meilisearch/tests/search/distinct.rs index aea98215d..68f7f18e8 100644 --- a/meilisearch/tests/search/distinct.rs +++ b/meilisearch/tests/search/distinct.rs @@ -107,6 +107,39 @@ static DOCUMENTS: Lazy = Lazy::new(|| { ]) }); +static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + { + "id": 1, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": { "main": "Brown", "pattern": "stripped" }, + }, + { + "id": 2, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": { "main": "Black", "pattern": "stripped" }, + }, + { + "id": 3, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": { "main": "Blue", "pattern": "used" }, + }, + { + "id": 4, + "description": "T-Shirt", + "brand": "Nike", + "product_id": "789012", + "color": { "main": "Blue", "pattern": "stripped" }, + } + ]) +}); + static DOCUMENT_PRIMARY_KEY: &str = "id"; static DOCUMENT_DISTINCT_KEY: &str = "product_id"; @@ -239,3 +272,31 @@ async fn distinct_search_with_pagination_no_ranking() { snapshot!(response["totalPages"], @"2"); snapshot!(response["totalHits"], @"6"); } + +#[actix_rt::test] +async fn distinct_at_search_time() { + let server = Server::new().await; + let index = server.index("tamo"); + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; + index.update_settings_filterable_attributes(json!(["color"])).await; + index.wait_task(1).await; + + fn get_hits(response: &Value) -> Vec<&str> { + let hits_array = response["hits"] + .as_array() + .unwrap_or_else(|| panic!("{}", &serde_json::to_string_pretty(&response).unwrap())); + hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::>() + } + + let (response, code) = + index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "color.main"})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"0"); + snapshot!(format!("{:?}", hits), @r#"[]"#); + snapshot!(response["page"], @"0"); + snapshot!(response["totalPages"], @"3"); + snapshot!(response["totalHits"], @"6"); +} From a8a085442130bb7ff755de6b5d42fdbf8f2241e9 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 17 Jun 2024 14:30:50 +0200 Subject: [PATCH 6/9] Update meilisearch/src/analytics/segment_analytics.rs --- meilisearch/src/analytics/segment_analytics.rs | 1 - 1 file changed, 1 deletion(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index ebd808b42..aa87a234c 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -988,7 +988,6 @@ impl SearchAggregator { "with_geoPoint": sort_with_geo_point, "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), }, - // TODO ask help from María "distinct": distinct, "filter": { "with_geoRadius": filter_with_geo_radius, From d7844a6e4542c7b02c221661ce49d2a2be70a8ed Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 17 Jun 2024 15:37:32 +0200 Subject: [PATCH 7/9] add a bunch of tests on the errors of the distinct at search time --- meilisearch/tests/search/errors.rs | 63 ++++++++++++++++++++++++++++++ 1 file changed, 63 insertions(+) diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index 53d516c44..3f631773e 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -1072,3 +1072,66 @@ async fn search_on_unknown_field_plus_joker() { ) .await; } + +#[actix_rt::test] +async fn distinct_at_search_time() { + let server = Server::new().await; + let index = server.index("tamo"); + let (task, _) = index.create(None).await; + let task = index.wait_task(task.uid()).await; + snapshot!(task, name: "task-succeed"); + + let (response, code) = + index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", + "code": "invalid_search_distinct", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" + } + "###); + + let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await; + index.wait_task(task.uid()).await; + + let (response, code) = + index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.", + "code": "invalid_search_distinct", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" + } + "###); + + let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await; + index.wait_task(task.uid()).await; + + let (response, code) = + index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.", + "code": "invalid_search_distinct", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" + } + "###); + + let (response, code) = + index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.distinct`: expected a string, but found a boolean: `true`", + "code": "invalid_search_distinct", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" + } + "###); +} From 43875e6758ab434c044cf9564852b7789eb88159 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 17 Jun 2024 15:59:30 +0200 Subject: [PATCH 8/9] fix bug around nested fields --- meilisearch/tests/search/distinct.rs | 24 ++++++++++++++---------- milli/src/search/mod.rs | 2 +- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/meilisearch/tests/search/distinct.rs b/meilisearch/tests/search/distinct.rs index 68f7f18e8..2023c01a8 100644 --- a/meilisearch/tests/search/distinct.rs +++ b/meilisearch/tests/search/distinct.rs @@ -280,23 +280,27 @@ async fn distinct_at_search_time() { let documents = NESTED_DOCUMENTS.clone(); index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; - index.update_settings_filterable_attributes(json!(["color"])).await; - index.wait_task(1).await; + let (task, _) = index.update_settings_filterable_attributes(json!(["color.main"])).await; + let task = index.wait_task(task.uid()).await; + snapshot!(task, name: "succeed"); - fn get_hits(response: &Value) -> Vec<&str> { + fn get_hits(response: &Value) -> Vec { let hits_array = response["hits"] .as_array() .unwrap_or_else(|| panic!("{}", &serde_json::to_string_pretty(&response).unwrap())); - hits_array.iter().map(|h| h[DOCUMENT_DISTINCT_KEY].as_str().unwrap()).collect::>() + hits_array + .iter() + .map(|h| h[DOCUMENT_PRIMARY_KEY].as_number().unwrap().to_string()) + .collect::>() } let (response, code) = - index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "color.main"})).await; + index.search_post(json!({"page": 1, "hitsPerPage": 3, "distinct": "color.main"})).await; let hits = get_hits(&response); snapshot!(code, @"200 OK"); - snapshot!(hits.len(), @"0"); - snapshot!(format!("{:?}", hits), @r#"[]"#); - snapshot!(response["page"], @"0"); - snapshot!(response["totalPages"], @"3"); - snapshot!(response["totalHits"], @"6"); + snapshot!(hits.len(), @"3"); + snapshot!(format!("{:?}", hits), @r###"["1", "2", "3"]"###); + snapshot!(response["page"], @"1"); + snapshot!(response["totalPages"], @"1"); + snapshot!(response["totalHits"], @"3"); } diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 922b72d04..bf488f9f0 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -178,7 +178,7 @@ impl<'a> Search<'a> { if let Some(distinct) = &self.distinct { let filterable_fields = ctx.index.filterable_fields(ctx.txn)?; - if !filterable_fields.contains(distinct) { + if !crate::is_faceted(distinct, &filterable_fields) { let (valid_fields, hidden_fields) = ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?; return Err(Error::UserError(UserError::InvalidDistinctAttribute { From 8ba65e333bd40f8e6e35fd41f8dc90732e6de631 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 17 Jun 2024 16:50:26 +0200 Subject: [PATCH 9/9] add snapshot files --- .../distinct_at_search_time/succeed.snap | 20 +++++++++++++++++++ .../distinct_at_search_time/task-succeed.snap | 18 +++++++++++++++++ 2 files changed, 38 insertions(+) create mode 100644 meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap create mode 100644 meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap diff --git a/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap b/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap new file mode 100644 index 000000000..1b8190c42 --- /dev/null +++ b/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap @@ -0,0 +1,20 @@ +--- +source: meilisearch/tests/search/distinct.rs +--- +{ + "uid": 1, + "indexUid": "tamo", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "filterableAttributes": [ + "color.main" + ] + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" +} diff --git a/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap b/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap new file mode 100644 index 000000000..903e96ffb --- /dev/null +++ b/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap @@ -0,0 +1,18 @@ +--- +source: meilisearch/tests/search/errors.rs +--- +{ + "uid": 0, + "indexUid": "tamo", + "status": "succeeded", + "type": "indexCreation", + "canceledBy": null, + "details": { + "primaryKey": null + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" +}