4666: Add a score threshold search parameter r=ManyTheFish a=dureuill

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4609

## What does this PR do?
- See [usage](https://meilisearch.notion.site/Filter-by-score-usage-224a183ce7b24ca99b6a9a8da755668a?pvs=25#95b76ded400342ba9ab3d67c734836f0) and [the known limitation](https://meilisearch.notion.site/Filter-by-score-usage-224a183ce7b24ca99b6a9a8da755668a?pvs=25#e4e32195bf0e4195b5daecdbb7a97a17)


Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-06-03 08:42:44 +00:00 committed by GitHub
commit fc584f1db3
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
18 changed files with 804 additions and 32 deletions

View file

@ -648,6 +648,7 @@ pub struct SearchAggregator {
// scoring
show_ranking_score: bool,
show_ranking_score_details: bool,
ranking_score_threshold: bool,
}
impl SearchAggregator {
@ -676,6 +677,7 @@ impl SearchAggregator {
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
} = query;
let mut ret = Self::default();
@ -748,6 +750,7 @@ impl SearchAggregator {
ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some();
if let Some(hybrid) = hybrid {
ret.semantic_ratio = hybrid.semantic_ratio != DEFAULT_SEMANTIC_RATIO();
@ -821,6 +824,7 @@ impl SearchAggregator {
hybrid,
total_degraded,
total_used_negative_operator,
ranking_score_threshold,
} = other;
if self.timestamp.is_none() {
@ -904,6 +908,7 @@ impl SearchAggregator {
// scoring
self.show_ranking_score |= show_ranking_score;
self.show_ranking_score_details |= show_ranking_score_details;
self.ranking_score_threshold |= ranking_score_threshold;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@ -945,6 +950,7 @@ impl SearchAggregator {
hybrid,
total_degraded,
total_used_negative_operator,
ranking_score_threshold,
} = self;
if total_received == 0 {
@ -1015,6 +1021,7 @@ impl SearchAggregator {
"scoring": {
"show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
},
});
@ -1087,6 +1094,7 @@ impl MultiSearchAggregator {
matching_strategy: _,
attributes_to_search_on: _,
hybrid: _,
ranking_score_threshold: _,
} = query;
index_uid.as_str()
@ -1234,6 +1242,7 @@ impl FacetSearchAggregator {
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
} = query;
let mut ret = Self::default();
@ -1248,7 +1257,8 @@ impl FacetSearchAggregator {
|| filter.is_some()
|| *matching_strategy != MatchingStrategy::default()
|| attributes_to_search_on.is_some()
|| hybrid.is_some();
|| hybrid.is_some()
|| ranking_score_threshold.is_some();
ret
}
@ -1624,6 +1634,7 @@ pub struct SimilarAggregator {
// scoring
show_ranking_score: bool,
show_ranking_score_details: bool,
ranking_score_threshold: bool,
}
impl SimilarAggregator {
@ -1638,6 +1649,7 @@ impl SimilarAggregator {
show_ranking_score,
show_ranking_score_details,
filter,
ranking_score_threshold,
} = query;
let mut ret = Self::default();
@ -1675,6 +1687,7 @@ impl SimilarAggregator {
ret.show_ranking_score = *show_ranking_score;
ret.show_ranking_score_details = *show_ranking_score_details;
ret.ranking_score_threshold = ranking_score_threshold.is_some();
ret.embedder = embedder.is_some();
@ -1708,6 +1721,7 @@ impl SimilarAggregator {
show_ranking_score,
show_ranking_score_details,
embedder,
ranking_score_threshold,
} = other;
if self.timestamp.is_none() {
@ -1749,6 +1763,7 @@ impl SimilarAggregator {
// scoring
self.show_ranking_score |= show_ranking_score;
self.show_ranking_score_details |= show_ranking_score_details;
self.ranking_score_threshold |= ranking_score_threshold;
}
pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@ -1769,6 +1784,7 @@ impl SimilarAggregator {
show_ranking_score,
show_ranking_score_details,
embedder,
ranking_score_threshold,
} = self;
if total_received == 0 {
@ -1808,6 +1824,7 @@ impl SimilarAggregator {
"scoring": {
"show_ranking_score": show_ranking_score,
"show_ranking_score_details": show_ranking_score_details,
"ranking_score_threshold": ranking_score_threshold,
},
});

View file

@ -14,8 +14,8 @@ use crate::extractors::authentication::policies::*;
use crate::extractors::authentication::GuardedData;
use crate::routes::indexes::search::search_kind;
use crate::search::{
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, SearchQuery,
DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
add_search_rules, perform_facet_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
SearchQuery, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
use crate::search_queue::SearchQueue;
@ -46,6 +46,8 @@ pub struct FacetSearchQuery {
pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
}
pub async fn search(
@ -103,6 +105,7 @@ impl From<FacetSearchQuery> for SearchQuery {
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
} = value;
SearchQuery {
@ -128,6 +131,7 @@ impl From<FacetSearchQuery> for SearchQuery {
vector,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
}
}
}

View file

@ -19,9 +19,10 @@ use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS;
use crate::search::{
add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchKind, SearchQuery,
SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG,
DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
add_search_rules, perform_search, HybridQuery, MatchingStrategy, RankingScoreThreshold,
SearchKind, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER,
DEFAULT_HIGHLIGHT_POST_TAG, DEFAULT_HIGHLIGHT_PRE_TAG, DEFAULT_SEARCH_LIMIT,
DEFAULT_SEARCH_OFFSET, DEFAULT_SEMANTIC_RATIO,
};
use crate::search_queue::SearchQueue;
@ -82,6 +83,21 @@ pub struct SearchQueryGet {
pub hybrid_embedder: Option<String>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchSemanticRatio>)]
pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
pub struct RankingScoreThresholdGet(RankingScoreThreshold);
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
type Error = InvalidSearchRankingScoreThreshold;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f64 = s.parse().map_err(|_| InvalidSearchRankingScoreThreshold)?;
Ok(RankingScoreThresholdGet(RankingScoreThreshold::try_from(f)?))
}
}
#[derive(Debug, Clone, Copy, Default, PartialEq, deserr::Deserr)]
@ -152,6 +168,7 @@ impl From<SearchQueryGet> for SearchQuery {
matching_strategy: other.matching_strategy,
attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
hybrid,
ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
}
}
}

View file

@ -6,8 +6,8 @@ use meilisearch_types::deserr::query_params::Param;
use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
use meilisearch_types::error::deserr_codes::{
InvalidEmbedder, InvalidSimilarAttributesToRetrieve, InvalidSimilarFilter, InvalidSimilarId,
InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarShowRankingScore,
InvalidSimilarShowRankingScoreDetails,
InvalidSimilarLimit, InvalidSimilarOffset, InvalidSimilarRankingScoreThreshold,
InvalidSimilarShowRankingScore, InvalidSimilarShowRankingScoreDetails,
};
use meilisearch_types::error::{ErrorCode as _, ResponseError};
use meilisearch_types::index_uid::IndexUid;
@ -21,8 +21,8 @@ use crate::analytics::{Analytics, SimilarAggregator};
use crate::extractors::authentication::GuardedData;
use crate::extractors::sequential_extractor::SeqHandler;
use crate::search::{
add_search_rules, perform_similar, SearchKind, SimilarQuery, SimilarResult,
DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
add_search_rules, perform_similar, RankingScoreThresholdSimilar, SearchKind, SimilarQuery,
SimilarResult, DEFAULT_SEARCH_LIMIT, DEFAULT_SEARCH_OFFSET,
};
pub fn configure(cfg: &mut web::ServiceConfig) {
@ -42,9 +42,7 @@ pub async fn similar_get(
) -> Result<HttpResponse, ResponseError> {
let index_uid = IndexUid::try_from(index_uid.into_inner())?;
let query = params.0.try_into().map_err(|code: InvalidSimilarId| {
ResponseError::from_msg(code.to_string(), code.error_code())
})?;
let query = params.0.try_into()?;
let mut aggregate = SimilarAggregator::from_query(&query, &req);
@ -130,12 +128,27 @@ pub struct SimilarQueryGet {
show_ranking_score: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarShowRankingScoreDetails>)]
show_ranking_score_details: Param<bool>,
#[deserr(default, error = DeserrQueryParamError<InvalidSimilarRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
#[deserr(default, error = DeserrQueryParamError<InvalidEmbedder>)]
pub embedder: Option<String>,
}
#[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
#[deserr(try_from(String) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
pub struct RankingScoreThresholdGet(RankingScoreThresholdSimilar);
impl std::convert::TryFrom<String> for RankingScoreThresholdGet {
type Error = InvalidSimilarRankingScoreThreshold;
fn try_from(s: String) -> Result<Self, Self::Error> {
let f: f64 = s.parse().map_err(|_| InvalidSimilarRankingScoreThreshold)?;
Ok(RankingScoreThresholdGet(RankingScoreThresholdSimilar::try_from(f)?))
}
}
impl TryFrom<SimilarQueryGet> for SimilarQuery {
type Error = InvalidSimilarId;
type Error = ResponseError;
fn try_from(
SimilarQueryGet {
@ -147,6 +160,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
show_ranking_score,
show_ranking_score_details,
embedder,
ranking_score_threshold,
}: SimilarQueryGet,
) -> Result<Self, Self::Error> {
let filter = match filter {
@ -158,7 +172,9 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
};
Ok(SimilarQuery {
id: id.0.try_into()?,
id: id.0.try_into().map_err(|code: InvalidSimilarId| {
ResponseError::from_msg(code.to_string(), code.error_code())
})?,
offset: offset.0,
limit: limit.0,
filter,
@ -166,6 +182,7 @@ impl TryFrom<SimilarQueryGet> for SimilarQuery {
attributes_to_retrieve: attributes_to_retrieve.map(|o| o.into_iter().collect()),
show_ranking_score: show_ranking_score.0,
show_ranking_score_details: show_ranking_score_details.0,
ranking_score_threshold: ranking_score_threshold.map(|x| x.0),
})
}
}

View file

@ -87,6 +87,44 @@ pub struct SearchQuery {
pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSearchRankingScoreThreshold)]
pub struct RankingScoreThreshold(f64);
impl std::convert::TryFrom<f64> for RankingScoreThreshold {
type Error = InvalidSearchRankingScoreThreshold;
fn try_from(f: f64) -> Result<Self, Self::Error> {
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
#[allow(clippy::manual_range_contains)]
if f > 1.0 || f < 0.0 {
Err(InvalidSearchRankingScoreThreshold)
} else {
Ok(RankingScoreThreshold(f))
}
}
}
#[derive(Debug, Clone, Copy, PartialEq, Deserr)]
#[deserr(try_from(f64) = TryFrom::try_from -> InvalidSimilarRankingScoreThreshold)]
pub struct RankingScoreThresholdSimilar(f64);
impl std::convert::TryFrom<f64> for RankingScoreThresholdSimilar {
type Error = InvalidSimilarRankingScoreThreshold;
fn try_from(f: f64) -> Result<Self, Self::Error> {
// the suggested "fix" is: `!(0.0..=1.0).contains(&f)`` which is allegedly less readable
#[allow(clippy::manual_range_contains)]
if f > 1.0 || f < 0.0 {
Err(InvalidSimilarRankingScoreThreshold)
} else {
Ok(Self(f))
}
}
}
// Since this structure is logged A LOT we're going to reduce the number of things it logs to the bare minimum.
@ -117,6 +155,7 @@ impl fmt::Debug for SearchQuery {
crop_marker,
matching_strategy,
attributes_to_search_on,
ranking_score_threshold,
} = self;
let mut debug = f.debug_struct("SearchQuery");
@ -188,6 +227,9 @@ impl fmt::Debug for SearchQuery {
debug.field("highlight_pre_tag", &highlight_pre_tag);
debug.field("highlight_post_tag", &highlight_post_tag);
debug.field("crop_marker", &crop_marker);
if let Some(ranking_score_threshold) = ranking_score_threshold {
debug.field("ranking_score_threshold", &ranking_score_threshold);
}
debug.finish()
}
@ -356,6 +398,8 @@ pub struct SearchQueryWithIndex {
pub matching_strategy: MatchingStrategy,
#[deserr(default, error = DeserrJsonError<InvalidSearchAttributesToSearchOn>, default)]
pub attributes_to_search_on: Option<Vec<String>>,
#[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThreshold>,
}
impl SearchQueryWithIndex {
@ -384,6 +428,7 @@ impl SearchQueryWithIndex {
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
} = self;
(
index_uid,
@ -410,6 +455,7 @@ impl SearchQueryWithIndex {
matching_strategy,
attributes_to_search_on,
hybrid,
ranking_score_threshold,
// do not use ..Default::default() here,
// rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
},
@ -436,6 +482,8 @@ pub struct SimilarQuery {
pub show_ranking_score: bool,
#[deserr(default, error = DeserrJsonError<InvalidSimilarShowRankingScoreDetails>, default)]
pub show_ranking_score_details: bool,
#[deserr(default, error = DeserrJsonError<InvalidSimilarRankingScoreThreshold>, default)]
pub ranking_score_threshold: Option<RankingScoreThresholdSimilar>,
}
#[derive(Debug, Clone, PartialEq, Deserr)]
@ -664,6 +712,9 @@ fn prepare_search<'t>(
) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> {
let mut search = index.search(rtxn);
search.time_budget(time_budget);
if let Some(ranking_score_threshold) = query.ranking_score_threshold {
search.ranking_score_threshold(ranking_score_threshold.0);
}
match search_kind {
SearchKind::KeywordOnly => {
@ -705,11 +756,16 @@ fn prepare_search<'t>(
.unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS);
search.exhaustive_number_hits(is_finite_pagination);
search.scoring_strategy(if query.show_ranking_score || query.show_ranking_score_details {
ScoringStrategy::Detailed
} else {
ScoringStrategy::Skip
});
search.scoring_strategy(
if query.show_ranking_score
|| query.show_ranking_score_details
|| query.ranking_score_threshold.is_some()
{
ScoringStrategy::Detailed
} else {
ScoringStrategy::Skip
},
);
// compute the offset on the limit depending on the pagination mode.
let (offset, limit) = if is_finite_pagination {
@ -787,10 +843,6 @@ pub fn perform_search(
let SearchQuery {
q,
vector: _,
hybrid: _,
// already computed from prepare_search
offset: _,
limit,
page,
hits_per_page,
@ -801,14 +853,19 @@ pub fn perform_search(
show_matches_position,
show_ranking_score,
show_ranking_score_details,
filter: _,
sort,
facets,
highlight_pre_tag,
highlight_post_tag,
crop_marker,
// already used in prepare_search
vector: _,
hybrid: _,
offset: _,
ranking_score_threshold: _,
matching_strategy: _,
attributes_to_search_on: _,
filter: _,
} = query;
let format = AttributesFormat {
@ -1070,6 +1127,7 @@ pub fn perform_similar(
attributes_to_retrieve,
show_ranking_score,
show_ranking_score_details,
ranking_score_threshold,
} = query;
// using let-else rather than `?` so that the borrow checker identifies we're always returning here,
@ -1093,6 +1151,10 @@ pub fn perform_similar(
}
}
if let Some(ranking_score_threshold) = ranking_score_threshold {
similar.ranking_score_threshold(ranking_score_threshold.0);
}
let milli::SearchResult {
documents_ids,
matching_words: _,