diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index ae2a753db..086396d7d 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -273,13 +273,14 @@ InvalidSimilarShowRankingScore , InvalidRequest , BAD_REQUEST ; InvalidSearchShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSimilarShowRankingScoreDetails , InvalidRequest , BAD_REQUEST ; InvalidSearchSort , InvalidRequest , BAD_REQUEST ; +InvalidSearchDistinct , InvalidRequest , BAD_REQUEST ; InvalidSettingsDisplayedAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsDistinctAttribute , InvalidRequest , BAD_REQUEST ; InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ; InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ; -InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ; +InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ; InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ; InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ; InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ; @@ -384,6 +385,7 @@ impl ErrorCode for milli::Error { Code::IndexPrimaryKeyMultipleCandidatesFound } UserError::PrimaryKeyCannotBeChanged(_) => Code::IndexPrimaryKeyAlreadyExists, + UserError::InvalidDistinctAttribute { .. } => Code::InvalidSearchDistinct, UserError::SortRankingRuleMissing => Code::InvalidSearchSort, UserError::InvalidFacetsDistribution { .. } => Code::InvalidSearchFacets, UserError::InvalidSortableAttribute { .. } => Code::InvalidSearchSort, diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 56a781c47..94e4684d5 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -597,6 +597,9 @@ pub struct SearchAggregator { // every time a request has a filter, this field must be incremented by one sort_total_number_of_criteria: usize, + // distinct + distinct: bool, + // filter filter_with_geo_radius: bool, filter_with_geo_bounding_box: bool, @@ -672,6 +675,7 @@ impl SearchAggregator { show_ranking_score_details, filter, sort, + distinct, facets: _, highlight_pre_tag, highlight_post_tag, @@ -694,6 +698,8 @@ impl SearchAggregator { ret.sort_sum_of_criteria_terms = sort.len(); } + ret.distinct = distinct.is_some(); + if let Some(ref filter) = filter { static RE: Lazy = Lazy::new(|| Regex::new("AND | OR").unwrap()); ret.filter_total_number_of_criteria = 1; @@ -798,6 +804,7 @@ impl SearchAggregator { sort_with_geo_point, sort_sum_of_criteria_terms, sort_total_number_of_criteria, + distinct, filter_with_geo_radius, filter_with_geo_bounding_box, filter_sum_of_criteria_terms, @@ -855,6 +862,9 @@ impl SearchAggregator { self.sort_total_number_of_criteria = self.sort_total_number_of_criteria.saturating_add(sort_total_number_of_criteria); + // distinct + self.distinct |= distinct; + // filter self.filter_with_geo_radius |= filter_with_geo_radius; self.filter_with_geo_bounding_box |= filter_with_geo_bounding_box; @@ -926,6 +936,7 @@ impl SearchAggregator { sort_with_geo_point, sort_sum_of_criteria_terms, sort_total_number_of_criteria, + distinct, filter_with_geo_radius, filter_with_geo_bounding_box, filter_sum_of_criteria_terms, @@ -983,6 +994,7 @@ impl SearchAggregator { "with_geoPoint": sort_with_geo_point, "avg_criteria_number": format!("{:.2}", sort_sum_of_criteria_terms as f64 / sort_total_number_of_criteria as f64), }, + "distinct": distinct, "filter": { "with_geoRadius": filter_with_geo_radius, "with_geoBoundingBox": filter_with_geo_bounding_box, @@ -1095,6 +1107,7 @@ impl MultiSearchAggregator { show_matches_position: _, filter: _, sort: _, + distinct: _, facets: _, highlight_pre_tag: _, highlight_post_tag: _, diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 2e9cf6e1b..89d0418b4 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -124,6 +124,7 @@ impl From for SearchQuery { show_ranking_score_details: false, filter, sort: None, + distinct: None, facets: None, highlight_pre_tag: DEFAULT_HIGHLIGHT_PRE_TAG(), highlight_post_tag: DEFAULT_HIGHLIGHT_POST_TAG(), diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 421cf2940..985864ba5 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -63,6 +63,8 @@ pub struct SearchQueryGet { filter: Option, #[deserr(default, error = DeserrQueryParamError)] sort: Option, + #[deserr(default, error = DeserrQueryParamError)] + distinct: Option, #[deserr(default, error = DeserrQueryParamError)] show_matches_position: Param, #[deserr(default, error = DeserrQueryParamError)] @@ -161,6 +163,7 @@ impl From for SearchQuery { attributes_to_highlight: other.attributes_to_highlight.map(|o| o.into_iter().collect()), filter, sort: other.sort.map(|attr| fix_sort_query_parameters(&attr)), + distinct: other.distinct, show_matches_position: other.show_matches_position.0, show_ranking_score: other.show_ranking_score.0, show_ranking_score_details: other.show_ranking_score_details.0, diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 9632e3f5d..61df1827d 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -78,6 +78,8 @@ pub struct SearchQuery { pub filter: Option, #[deserr(default, error = DeserrJsonError)] pub sort: Option>, + #[deserr(default, error = DeserrJsonError)] + pub distinct: Option, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] @@ -153,6 +155,7 @@ impl fmt::Debug for SearchQuery { show_ranking_score_details, filter, sort, + distinct, facets, highlight_pre_tag, highlight_post_tag, @@ -202,6 +205,9 @@ impl fmt::Debug for SearchQuery { if let Some(sort) = sort { debug.field("sort", &sort); } + if let Some(distinct) = distinct { + debug.field("distinct", &distinct); + } if let Some(facets) = facets { debug.field("facets", &facets); } @@ -395,6 +401,8 @@ pub struct SearchQueryWithIndex { pub filter: Option, #[deserr(default, error = DeserrJsonError)] pub sort: Option>, + #[deserr(default, error = DeserrJsonError)] + pub distinct: Option, #[deserr(default, error = DeserrJsonError)] pub facets: Option>, #[deserr(default, error = DeserrJsonError, default = DEFAULT_HIGHLIGHT_PRE_TAG())] @@ -431,6 +439,7 @@ impl SearchQueryWithIndex { show_matches_position, filter, sort, + distinct, facets, highlight_pre_tag, highlight_post_tag, @@ -459,6 +468,7 @@ impl SearchQueryWithIndex { show_matches_position, filter, sort, + distinct, facets, highlight_pre_tag, highlight_post_tag, @@ -729,6 +739,10 @@ fn prepare_search<'t>( search.ranking_score_threshold(ranking_score_threshold.0); } + if let Some(distinct) = &query.distinct { + search.distinct(distinct.clone()); + } + match search_kind { SearchKind::KeywordOnly => { if let Some(q) = &query.q { @@ -882,6 +896,7 @@ pub fn perform_search( matching_strategy: _, attributes_to_search_on: _, filter: _, + distinct: _, } = query; let format = AttributesFormat { diff --git a/meilisearch/tests/search/distinct.rs b/meilisearch/tests/search/distinct.rs index aea98215d..2023c01a8 100644 --- a/meilisearch/tests/search/distinct.rs +++ b/meilisearch/tests/search/distinct.rs @@ -107,6 +107,39 @@ static DOCUMENTS: Lazy = Lazy::new(|| { ]) }); +static NESTED_DOCUMENTS: Lazy = Lazy::new(|| { + json!([ + { + "id": 1, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": { "main": "Brown", "pattern": "stripped" }, + }, + { + "id": 2, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": { "main": "Black", "pattern": "stripped" }, + }, + { + "id": 3, + "description": "Leather Jacket", + "brand": "Lee Jeans", + "product_id": "123456", + "color": { "main": "Blue", "pattern": "used" }, + }, + { + "id": 4, + "description": "T-Shirt", + "brand": "Nike", + "product_id": "789012", + "color": { "main": "Blue", "pattern": "stripped" }, + } + ]) +}); + static DOCUMENT_PRIMARY_KEY: &str = "id"; static DOCUMENT_DISTINCT_KEY: &str = "product_id"; @@ -239,3 +272,35 @@ async fn distinct_search_with_pagination_no_ranking() { snapshot!(response["totalPages"], @"2"); snapshot!(response["totalHits"], @"6"); } + +#[actix_rt::test] +async fn distinct_at_search_time() { + let server = Server::new().await; + let index = server.index("tamo"); + + let documents = NESTED_DOCUMENTS.clone(); + index.add_documents(documents, Some(DOCUMENT_PRIMARY_KEY)).await; + let (task, _) = index.update_settings_filterable_attributes(json!(["color.main"])).await; + let task = index.wait_task(task.uid()).await; + snapshot!(task, name: "succeed"); + + fn get_hits(response: &Value) -> Vec { + let hits_array = response["hits"] + .as_array() + .unwrap_or_else(|| panic!("{}", &serde_json::to_string_pretty(&response).unwrap())); + hits_array + .iter() + .map(|h| h[DOCUMENT_PRIMARY_KEY].as_number().unwrap().to_string()) + .collect::>() + } + + let (response, code) = + index.search_post(json!({"page": 1, "hitsPerPage": 3, "distinct": "color.main"})).await; + let hits = get_hits(&response); + snapshot!(code, @"200 OK"); + snapshot!(hits.len(), @"3"); + snapshot!(format!("{:?}", hits), @r###"["1", "2", "3"]"###); + snapshot!(response["page"], @"1"); + snapshot!(response["totalPages"], @"1"); + snapshot!(response["totalHits"], @"3"); +} diff --git a/meilisearch/tests/search/errors.rs b/meilisearch/tests/search/errors.rs index 75977b190..dc7bf70a7 100644 --- a/meilisearch/tests/search/errors.rs +++ b/meilisearch/tests/search/errors.rs @@ -1140,3 +1140,66 @@ async fn search_on_unknown_field_plus_joker() { ) .await; } + +#[actix_rt::test] +async fn distinct_at_search_time() { + let server = Server::new().await; + let index = server.index("tamo"); + let (task, _) = index.create(None).await; + let task = index.wait_task(task.uid()).await; + snapshot!(task, name: "task-succeed"); + + let (response, code) = + index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. This index does not have configured filterable attributes.", + "code": "invalid_search_distinct", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" + } + "###); + + let (task, _) = index.update_settings_filterable_attributes(json!(["color", "machin"])).await; + index.wait_task(task.uid()).await; + + let (response, code) = + index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, machin`.", + "code": "invalid_search_distinct", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" + } + "###); + + let (task, _) = index.update_settings_displayed_attributes(json!(["color"])).await; + index.wait_task(task.uid()).await; + + let (response, code) = + index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": "doggo.truc"})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Attribute `doggo.truc` is not filterable and thus, cannot be used as distinct attribute. Available filterable attributes are: `color, <..hidden-attributes>`.", + "code": "invalid_search_distinct", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" + } + "###); + + let (response, code) = + index.search_post(json!({"page": 0, "hitsPerPage": 2, "distinct": true})).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(response, @r###" + { + "message": "Invalid value type at `.distinct`: expected a string, but found a boolean: `true`", + "code": "invalid_search_distinct", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_search_distinct" + } + "###); +} diff --git a/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap b/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap new file mode 100644 index 000000000..1b8190c42 --- /dev/null +++ b/meilisearch/tests/search/snapshots/distinct.rs/distinct_at_search_time/succeed.snap @@ -0,0 +1,20 @@ +--- +source: meilisearch/tests/search/distinct.rs +--- +{ + "uid": 1, + "indexUid": "tamo", + "status": "succeeded", + "type": "settingsUpdate", + "canceledBy": null, + "details": { + "filterableAttributes": [ + "color.main" + ] + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" +} diff --git a/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap b/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap new file mode 100644 index 000000000..903e96ffb --- /dev/null +++ b/meilisearch/tests/search/snapshots/errors.rs/distinct_at_search_time/task-succeed.snap @@ -0,0 +1,18 @@ +--- +source: meilisearch/tests/search/errors.rs +--- +{ + "uid": 0, + "indexUid": "tamo", + "status": "succeeded", + "type": "indexCreation", + "canceledBy": null, + "details": { + "primaryKey": null + }, + "error": null, + "duration": "[duration]", + "enqueuedAt": "[date]", + "startedAt": "[date]", + "finishedAt": "[date]" +} diff --git a/milli/examples/search.rs b/milli/examples/search.rs index 0195c396f..87020994a 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -59,6 +59,7 @@ fn main() -> Result<(), Box> { false, universe, &None, + &None, GeoSortStrategy::default(), 0, 20, diff --git a/milli/src/error.rs b/milli/src/error.rs index 83754afe4..7420ce667 100644 --- a/milli/src/error.rs +++ b/milli/src/error.rs @@ -134,6 +134,17 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco } )] InvalidSortableAttribute { field: String, valid_fields: BTreeSet, hidden_fields: bool }, + #[error("Attribute `{}` is not filterable and thus, cannot be used as distinct attribute. {}", + .field, + match .valid_fields.is_empty() { + true => "This index does not have configured filterable attributes.".to_string(), + false => format!("Available filterable attributes are: `{}{}`.", + valid_fields.iter().map(AsRef::as_ref).collect::>().join(", "), + .hidden_fields.then_some(", <..hidden-attributes>").unwrap_or(""), + ), + } + )] + InvalidDistinctAttribute { field: String, valid_fields: BTreeSet, hidden_fields: bool }, #[error("Attribute `{}` is not facet-searchable. {}", .field, match .valid_fields.is_empty() { diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index 87f922c4c..1c784097d 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -159,6 +159,7 @@ impl<'a> Search<'a> { offset: 0, limit: self.limit + self.offset, sort_criteria: self.sort_criteria.clone(), + distinct: self.distinct.clone(), searchable_attributes: self.searchable_attributes, geo_strategy: self.geo_strategy, terms_matching_strategy: self.terms_matching_strategy, diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 49d73ff31..bf488f9f0 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -11,8 +11,8 @@ use self::new::{execute_vector_search, PartialSearchResult}; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::vector::Embedder; use crate::{ - execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, - SearchContext, TimeBudget, + execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Error, Index, + Result, SearchContext, TimeBudget, UserError, }; // Building these factories is not free. @@ -40,6 +40,7 @@ pub struct Search<'a> { offset: usize, limit: usize, sort_criteria: Option>, + distinct: Option, searchable_attributes: Option<&'a [String]>, geo_strategy: new::GeoSortStrategy, terms_matching_strategy: TermsMatchingStrategy, @@ -61,6 +62,7 @@ impl<'a> Search<'a> { offset: 0, limit: 20, sort_criteria: None, + distinct: None, searchable_attributes: None, geo_strategy: new::GeoSortStrategy::default(), terms_matching_strategy: TermsMatchingStrategy::default(), @@ -105,6 +107,11 @@ impl<'a> Search<'a> { self } + pub fn distinct(&mut self, distinct: String) -> &mut Search<'a> { + self.distinct = Some(distinct); + self + } + pub fn searchable_attributes(&mut self, searchable: &'a [String]) -> &mut Search<'a> { self.searchable_attributes = Some(searchable); self @@ -169,6 +176,19 @@ impl<'a> Search<'a> { ctx.attributes_to_search_on(searchable_attributes)?; } + if let Some(distinct) = &self.distinct { + let filterable_fields = ctx.index.filterable_fields(ctx.txn)?; + if !crate::is_faceted(distinct, &filterable_fields) { + let (valid_fields, hidden_fields) = + ctx.index.remove_hidden_fields(ctx.txn, filterable_fields)?; + return Err(Error::UserError(UserError::InvalidDistinctAttribute { + field: distinct.clone(), + valid_fields, + hidden_fields, + })); + } + } + let universe = filtered_universe(ctx.index, ctx.txn, &self.filter)?; let PartialSearchResult { located_query_terms, @@ -185,6 +205,7 @@ impl<'a> Search<'a> { self.scoring_strategy, universe, &self.sort_criteria, + &self.distinct, self.geo_strategy, self.offset, self.limit, @@ -202,6 +223,7 @@ impl<'a> Search<'a> { self.exhaustive_number_hits, universe, &self.sort_criteria, + &self.distinct, self.geo_strategy, self.offset, self.limit, @@ -238,6 +260,7 @@ impl fmt::Debug for Search<'_> { offset, limit, sort_criteria, + distinct, searchable_attributes, geo_strategy: _, terms_matching_strategy, @@ -257,6 +280,7 @@ impl fmt::Debug for Search<'_> { .field("offset", offset) .field("limit", limit) .field("sort_criteria", sort_criteria) + .field("distinct", distinct) .field("searchable_attributes", searchable_attributes) .field("terms_matching_strategy", terms_matching_strategy) .field("scoring_strategy", scoring_strategy) diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index d937c78bf..9255e4c09 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -22,6 +22,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( ctx: &mut SearchContext<'ctx>, mut ranking_rules: Vec>, query: &Q, + distinct: Option<&str>, universe: &RoaringBitmap, from: usize, length: usize, @@ -34,7 +35,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( logger.ranking_rules(&ranking_rules); logger.initial_universe(universe); - let distinct_fid = if let Some(field) = ctx.index.distinct_field(ctx.txn)? { + let distinct_field = match distinct { + Some(distinct) => Some(distinct), + None => ctx.index.distinct_field(ctx.txn)?, + }; + + let distinct_fid = if let Some(field) = distinct_field { ctx.index.fields_ids_map(ctx.txn)?.id(field) } else { None diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index 87ddb2915..77ae5fcd5 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -516,6 +516,7 @@ mod tests { false, universe, &None, + &None, crate::search::new::GeoSortStrategy::default(), 0, 100, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 52eb7ffea..25ce482d3 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -568,6 +568,7 @@ pub fn execute_vector_search( scoring_strategy: ScoringStrategy, universe: RoaringBitmap, sort_criteria: &Option>, + distinct: &Option, geo_strategy: geo_sort::Strategy, from: usize, length: usize, @@ -598,6 +599,7 @@ pub fn execute_vector_search( ctx, ranking_rules, &PlaceholderQuery, + distinct.as_deref(), &universe, from, length, @@ -627,6 +629,7 @@ pub fn execute_search( exhaustive_number_hits: bool, mut universe: RoaringBitmap, sort_criteria: &Option>, + distinct: &Option, geo_strategy: geo_sort::Strategy, from: usize, length: usize, @@ -717,6 +720,7 @@ pub fn execute_search( ctx, ranking_rules, &graph, + distinct.as_deref(), &universe, from, length, @@ -732,6 +736,7 @@ pub fn execute_search( ctx, ranking_rules, &PlaceholderQuery, + distinct.as_deref(), &universe, from, length, @@ -748,7 +753,12 @@ pub fn execute_search( // The candidates is the universe unless the exhaustive number of hits // is requested and a distinct attribute is set. if exhaustive_number_hits { - if let Some(f) = ctx.index.distinct_field(ctx.txn)? { + let distinct_field = match distinct.as_deref() { + Some(distinct) => Some(distinct), + None => ctx.index.distinct_field(ctx.txn)?, + }; + + if let Some(f) = distinct_field { if let Some(distinct_fid) = fields_ids_map.id(f) { all_candidates = apply_distinct_rule(ctx, distinct_fid, &all_candidates)?.remaining; } diff --git a/milli/src/search/new/tests/distinct.rs b/milli/src/search/new/tests/distinct.rs index c54600f27..75c00da2a 100644 --- a/milli/src/search/new/tests/distinct.rs +++ b/milli/src/search/new/tests/distinct.rs @@ -205,8 +205,18 @@ fn create_index() -> TempIndex { index } -fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec { - let vs = collect_field_values(index, txn, index.distinct_field(txn).unwrap().unwrap(), docids); +fn verify_distinct( + index: &Index, + txn: &RoTxn, + distinct: Option<&str>, + docids: &[u32], +) -> Vec { + let vs = collect_field_values( + index, + txn, + distinct.or_else(|| index.distinct_field(txn).unwrap()).unwrap(), + docids, + ); let mut unique = HashSet::new(); for v in vs.iter() { @@ -223,12 +233,49 @@ fn verify_distinct(index: &Index, txn: &RoTxn, docids: &[u32]) -> Vec { fn test_distinct_placeholder_no_ranking_rules() { let index = create_index(); + // Set the letter as filterable and unset the distinct attribute. + index + .update_settings(|s| { + s.set_filterable_fields(hashset! { S("letter") }); + s.reset_distinct_field(); + }) + .unwrap(); + + let txn = index.read_txn().unwrap(); + + let mut s = Search::new(&txn, &index); + s.distinct(S("letter")); + let SearchResult { documents_ids, .. } = s.execute().unwrap(); + insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]"); + let distinct_values = verify_distinct(&index, &txn, Some("letter"), &documents_ids); + insta::assert_debug_snapshot!(distinct_values, @r###" + [ + "\"A\"", + "\"B\"", + "\"C\"", + "\"D\"", + "\"E\"", + "\"F\"", + "\"G\"", + "\"H\"", + "\"I\"", + "__does_not_exist__", + "__does_not_exist__", + "__does_not_exist__", + ] + "###); +} + +#[test] +fn test_distinct_at_search_placeholder_no_ranking_rules() { + let index = create_index(); + let txn = index.read_txn().unwrap(); let s = Search::new(&txn, &index); let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 5, 8, 9, 15, 18, 20, 21, 24, 25, 26]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"A\"", @@ -263,7 +310,7 @@ fn test_distinct_placeholder_sort() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[14, 26, 4, 7, 17, 23, 1, 19, 25, 8, 20, 24]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"E\"", @@ -303,7 +350,7 @@ fn test_distinct_placeholder_sort() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[21, 20, 18, 15, 9, 8, 5, 2, 0, 24, 25, 26]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"I\"", @@ -346,7 +393,7 @@ fn test_distinct_placeholder_sort() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[23, 20, 19, 17, 14, 8, 7, 4, 1, 26, 25, 24]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"I\"", @@ -399,7 +446,7 @@ fn test_distinct_words() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[0, 2, 26, 5, 8, 9, 15, 18, 20, 21, 25, 24]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"A\"", @@ -453,7 +500,7 @@ fn test_distinct_sort_words() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[22, 20, 19, 16, 9, 8, 7, 3, 1, 26, 25, 24]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"I\"", @@ -549,7 +596,7 @@ fn test_distinct_typo() { let SearchResult { documents_ids, .. } = s.execute().unwrap(); insta::assert_snapshot!(format!("{documents_ids:?}"), @"[3, 26, 0, 7, 8, 9, 15, 22, 18, 20, 25, 24]"); - let distinct_values = verify_distinct(&index, &txn, &documents_ids); + let distinct_values = verify_distinct(&index, &txn, None, &documents_ids); insta::assert_debug_snapshot!(distinct_values, @r###" [ "\"B\"",