From c26db7878c99fa0ca3e866d0c690181e41fb118c Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Thu, 11 Apr 2024 19:04:43 +0200 Subject: [PATCH] Expose rankingScoreThreshold in API --- meilisearch-types/src/deserr/mod.rs | 1 + meilisearch-types/src/error.rs | 10 ++++ .../src/analytics/segment_analytics.rs | 3 + .../src/routes/indexes/facet_search.rs | 8 ++- meilisearch/src/routes/indexes/search.rs | 23 +++++++- meilisearch/src/search.rs | 55 +++++++++++++++---- milli/examples/search.rs | 1 + 7 files changed, 85 insertions(+), 16 deletions(-) diff --git a/meilisearch-types/src/deserr/mod.rs b/meilisearch-types/src/deserr/mod.rs index c593c50fb..198a4e7b7 100644 --- a/meilisearch-types/src/deserr/mod.rs +++ b/meilisearch-types/src/deserr/mod.rs @@ -189,4 +189,5 @@ merge_with_error_impl_take_error_message!(ParseTaskKindError); merge_with_error_impl_take_error_message!(ParseTaskStatusError); merge_with_error_impl_take_error_message!(IndexUidFormatError); merge_with_error_impl_take_error_message!(InvalidSearchSemanticRatio); +merge_with_error_impl_take_error_message!(InvalidSearchRankingScoreThreshold); merge_with_error_impl_take_error_message!(InvalidSimilarId); diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index d2218807f..bf8eaba1c 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -241,6 +241,7 @@ InvalidSearchAttributesToCrop , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToHighlight , InvalidRequest , BAD_REQUEST ; InvalidSimilarAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; InvalidSearchAttributesToRetrieve , InvalidRequest , BAD_REQUEST ; +InvalidSearchRankingScoreThreshold , InvalidRequest , BAD_REQUEST ; InvalidSearchCropLength , InvalidRequest , BAD_REQUEST ; InvalidSearchCropMarker , InvalidRequest , BAD_REQUEST ; InvalidSearchFacets , InvalidRequest , BAD_REQUEST ; @@ -505,6 +506,15 @@ impl fmt::Display for deserr_codes::InvalidSimilarId { } } +impl fmt::Display for deserr_codes::InvalidSearchRankingScoreThreshold { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "the value of `rankingScoreThreshold` is invalid, expected a float between `0.0` and `1.0`." + ) + } +} + #[macro_export] macro_rules! internal_error { ($target:ty : $($other:path), *) => { diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index add430893..10583da1b 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -676,6 +676,7 @@ impl SearchAggregator { matching_strategy, attributes_to_search_on, hybrid, + ranking_score_threshold, } = query; let mut ret = Self::default(); @@ -1087,6 +1088,7 @@ impl MultiSearchAggregator { matching_strategy: _, attributes_to_search_on: _, hybrid: _, + ranking_score_threshold: _, } = query; index_uid.as_str() @@ -1234,6 +1236,7 @@ impl FacetSearchAggregator { matching_strategy, attributes_to_search_on, hybrid, + ranking_score_threshold, } = query; let mut ret = Self::default(); diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs index 3f05fa846..845b476fe 100644 --- a/meilisearch/src/routes/indexes/facet_search.rs +++ b/meilisearch/src/routes/indexes/facet_search.rs @@ -14,9 +14,7 @@ use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::routes::indexes::search::search_kind; use crate::search::{ - add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery, - DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, - DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, + add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET }; use crate::search_queue::SearchQueue; @@ -46,6 +44,8 @@ pub struct FacetSearchQuery { pub matching_strategy: MatchingStrategy, #[deserr(default, error = DeserrJsonError, default)] pub attributes_to_search_on: Option>, + #[deserr(default, error = DeserrJsonError, default)] + pub ranking_score_threshold: Option, } pub async fn search( @@ -103,6 +103,7 @@ impl From for SearchQuery { matching_strategy, attributes_to_search_on, hybrid, + ranking_score_threshold, } = value; SearchQuery { @@ -128,6 +129,7 @@ impl From for SearchQuery { vector, attributes_to_search_on, hybrid, + ranking_score_threshold, } } } diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 8628da6d9..7f5acbd37 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -19,9 +19,10 @@ use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; use crate::search::{ - add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchKind, SearchQuery, - SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, - DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, + add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold, + SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, + DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, + DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO, }; use crate::search_queue::SearchQueue; @@ -82,6 +83,21 @@ pub struct SearchQueryGet { pub hybrid_embedder: Option, #[deserr(default, error = DeserrQueryParamError)] pub hybrid_semantic_ratio: Option, + #[deserr(default, error = DeserrQueryParamError, default)] + pub ranking_score_threshold: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)] +#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)] +pub struct RankingScoreThresholdGet(RankingScoreThreshold); + +impl std::convert::TryFrom for RankingScoreThresholdGet { + type Error = InvalidSearchRankingScoreThreshold; + + fn try_from(s: String) -> Result { + let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?; + Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?)) + } } #[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)] @@ -152,6 +168,7 @@ impl From for SearchQuery { matching_strategy: other.matching_strategy, attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()), hybrid, + ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0), } } } diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index c6c4e88ca..f4648a9d5 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -87,6 +87,26 @@ pub struct SearchQuery { pub matching_strategy: MatchingStrategy, #[deserr(default, error = DeserrJsonError, default)] pub attributes_to_search_on: Option>, + #[deserr(default, error = DeserrJsonError, default)] + pub ranking_score_threshold: Option, +} + +#[derive(Debug, Clone, Copy, PartialEq, Deserr)] +#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)] +pub struct RankingScoreThreshold(f64); + +impl std::convert::TryFrom for RankingScoreThreshold { + type Error = InvalidSearchRankingScoreThreshold; + + fn try_from(f: f64) -> Result { + // the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable + #[allow(clippy::manual_range_contains)] + if f > 1.0 || f < 0.0 { + Err(InvalidSearchRankingScoreThreshold) + } else { + Ok(RankingScoreThreshold(f)) + } + } } // Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum. @@ -117,6 +137,7 @@ impl fmt::Debug for SearchQuery { crop_marker, matching_strategy, attributes_to_search_on, + ranking_score_threshold, } = self; let mut debug = f.debug_struct("SearchQuery"); @@ -188,6 +209,9 @@ impl fmt::Debug for SearchQuery { debug.field("highlight_pre_tag", &highlight_pre_tag); debug.field("highlight_post_tag", &highlight_post_tag); debug.field("crop_marker", &crop_marker); + if let Some(ranking_score_threshold) = ranking_score_threshold { + debug.field("ranking_score_threshold", &ranking_score_threshold); + } debug.finish() } @@ -356,6 +380,8 @@ pub struct SearchQueryWithIndex { pub matching_strategy: MatchingStrategy, #[deserr(default, error = DeserrJsonError, default)] pub attributes_to_search_on: Option>, + #[deserr(default, error = DeserrJsonError, default)] + pub ranking_score_threshold: Option, } impl SearchQueryWithIndex { @@ -384,6 +410,7 @@ impl SearchQueryWithIndex { matching_strategy, attributes_to_search_on, hybrid, + ranking_score_threshold, } = self; ( index_uid, @@ -410,6 +437,7 @@ impl SearchQueryWithIndex { matching_strategy, attributes_to_search_on, hybrid, + ranking_score_threshold, // do not use ..Default::default() here, // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex` }, @@ -661,6 +689,7 @@ fn prepare_search<'t>( ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { let mut search = index.search(rtxn); search.time_budget(time_budget); + search.ranking_score_threshold(query.ranking_score_threshold.map(|rst| rst.0)); match search_kind { SearchKind::KeywordOnly => { @@ -702,11 +731,16 @@ fn prepare_search<'t>( .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); search.exhaustive_number_hits(is_finite_pagination); - search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details { - ScoringStrategy::Detailed - } else { - ScoringStrategy::Skip - }); + search.scoring_strategy( + if query.show_ranking_score + || query.show_ranking_score_details + || query.ranking_score_threshold.is_some() + { + ScoringStrategy::Detailed + } else { + ScoringStrategy::Skip + }, + ); // compute the offset on the limit depending on the pagination mode. let (offset, limit) = if is_finite_pagination { @@ -784,10 +818,6 @@ pub fn perform_search( let SearchQuery { q, - vector: _, - hybrid: _, - // already computed from prepare_search - offset: _, limit, page, hits_per_page, @@ -798,14 +828,19 @@ pub fn perform_search( show_matches_position, show_ranking_score, show_ranking_score_details, - filter: _, sort, facets, highlight_pre_tag, highlight_post_tag, crop_marker, + // already used in prepare_search + vector: _, + hybrid: _, + offset: _, + ranking_score_threshold: _, matching_strategy: _, attributes_to_search_on: _, + filter: _, } = query; let format = AttributesFormat { diff --git a/milli/examples/search.rs b/milli/examples/search.rs index 2779f5b15..0195c396f 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -66,6 +66,7 @@ fn main() -> Result<(), Box> { &mut DefaultSearchLogger, logger, TimeBudget::max(), + None, )?; if let Some((logger, dir)) = detailed_logger { logger.finish(&mut ctx, Path::new(dir))?;