From 600178c5abb02d493937597ba12fa31419c933ab Mon Sep 17 00:00:00 2001 From: Mubelotix Date: Tue, 1 Jul 2025 18:33:09 +0200 Subject: [PATCH] Still limit to max hits --- crates/meilisearch/src/search/mod.rs | 1 + crates/milli/src/search/hybrid.rs | 1 + crates/milli/src/search/mod.rs | 11 +++++++++++ crates/milli/src/search/new/bucket_sort.rs | 6 +++++- crates/milli/src/search/new/matches/mod.rs | 1 + crates/milli/src/search/new/mod.rs | 5 +++++ 6 files changed, 24 insertions(+), 1 deletion(-) diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs index 5e543c53f..e1cfc542b 100644 --- a/crates/meilisearch/src/search/mod.rs +++ b/crates/meilisearch/src/search/mod.rs @@ -1020,6 +1020,7 @@ pub fn prepare_search<'t>( .unwrap_or(DEFAULT_PAGINATION_MAX_TOTAL_HITS); search.exhaustive_number_hits(is_finite_pagination); + search.max_total_hits(Some(max_total_hits)); search.scoring_strategy( if query.show_ranking_score || query.show_ranking_score_details diff --git a/crates/milli/src/search/hybrid.rs b/crates/milli/src/search/hybrid.rs index b63f6288f..5fc228807 100644 --- a/crates/milli/src/search/hybrid.rs +++ b/crates/milli/src/search/hybrid.rs @@ -209,6 +209,7 @@ impl Search<'_> { scoring_strategy: ScoringStrategy::Detailed, words_limit: self.words_limit, exhaustive_number_hits: self.exhaustive_number_hits, + max_total_hits: self.max_total_hits, rtxn: self.rtxn, index: self.index, semantic: self.semantic.clone(), diff --git a/crates/milli/src/search/mod.rs b/crates/milli/src/search/mod.rs index ecb9af852..2192ea9fd 100644 --- a/crates/milli/src/search/mod.rs +++ b/crates/milli/src/search/mod.rs @@ -51,6 +51,7 @@ pub struct Search<'a> { scoring_strategy: ScoringStrategy, words_limit: usize, exhaustive_number_hits: bool, + max_total_hits: Option, rtxn: &'a heed::RoTxn<'a>, index: &'a Index, semantic: Option, @@ -73,6 +74,7 @@ impl<'a> Search<'a> { terms_matching_strategy: TermsMatchingStrategy::default(), scoring_strategy: Default::default(), exhaustive_number_hits: false, + max_total_hits: None, words_limit: 10, rtxn, index, @@ -163,6 +165,11 @@ impl<'a> Search<'a> { self } + pub fn max_total_hits(&mut self, max_total_hits: Option) -> &mut Search<'a> { + self.max_total_hits = max_total_hits; + self + } + pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> { self.time_budget = time_budget; self @@ -237,6 +244,7 @@ impl<'a> Search<'a> { vector, self.scoring_strategy, self.exhaustive_number_hits, + self.max_total_hits, universe, &self.sort_criteria, &self.distinct, @@ -256,6 +264,7 @@ impl<'a> Search<'a> { self.terms_matching_strategy, self.scoring_strategy, self.exhaustive_number_hits, + self.max_total_hits, universe, &self.sort_criteria, &self.distinct, @@ -309,6 +318,7 @@ impl fmt::Debug for Search<'_> { scoring_strategy, words_limit, exhaustive_number_hits, + max_total_hits, rtxn: _, index: _, semantic, @@ -328,6 +338,7 @@ impl fmt::Debug for Search<'_> { .field("terms_matching_strategy", terms_matching_strategy) .field("scoring_strategy", scoring_strategy) .field("exhaustive_number_hits", exhaustive_number_hits) + .field("max_total_hits", max_total_hits) .field("words_limit", words_limit) .field( "semantic.embedder_name", diff --git a/crates/milli/src/search/new/bucket_sort.rs b/crates/milli/src/search/new/bucket_sort.rs index f4fd62825..298983091 100644 --- a/crates/milli/src/search/new/bucket_sort.rs +++ b/crates/milli/src/search/new/bucket_sort.rs @@ -33,6 +33,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( time_budget: TimeBudget, ranking_score_threshold: Option, exhaustive_number_hits: bool, + max_total_hits: Option, ) -> Result { logger.initial_query(query); logger.ranking_rules(&ranking_rules); @@ -160,8 +161,11 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( }; } + let max_total_hits = max_total_hits.unwrap_or(usize::MAX); while valid_docids.len() < length - || (exhaustive_number_hits && ranking_score_threshold.is_some()) + || (exhaustive_number_hits + && ranking_score_threshold.is_some() + && valid_docids.len() < max_total_hits) { if time_budget.exceeded() { loop { diff --git a/crates/milli/src/search/new/matches/mod.rs b/crates/milli/src/search/new/matches/mod.rs index 2d6f2cf17..66f65f5e5 100644 --- a/crates/milli/src/search/new/matches/mod.rs +++ b/crates/milli/src/search/new/matches/mod.rs @@ -510,6 +510,7 @@ mod tests { crate::TermsMatchingStrategy::default(), crate::score_details::ScoringStrategy::Skip, false, + None, universe, &None, &None, diff --git a/crates/milli/src/search/new/mod.rs b/crates/milli/src/search/new/mod.rs index 2c6fe5c3c..047d08202 100644 --- a/crates/milli/src/search/new/mod.rs +++ b/crates/milli/src/search/new/mod.rs @@ -627,6 +627,7 @@ pub fn execute_vector_search( vector: &[f32], scoring_strategy: ScoringStrategy, exhaustive_number_hits: bool, + max_total_hits: Option, universe: RoaringBitmap, sort_criteria: &Option>, distinct: &Option, @@ -671,6 +672,7 @@ pub fn execute_vector_search( time_budget, ranking_score_threshold, exhaustive_number_hits, + max_total_hits, )?; Ok(PartialSearchResult { @@ -691,6 +693,7 @@ pub fn execute_search( terms_matching_strategy: TermsMatchingStrategy, scoring_strategy: ScoringStrategy, exhaustive_number_hits: bool, + max_total_hits: Option, mut universe: RoaringBitmap, sort_criteria: &Option>, distinct: &Option, @@ -828,6 +831,7 @@ pub fn execute_search( time_budget, ranking_score_threshold, exhaustive_number_hits, + max_total_hits, )? } else { let ranking_rules = @@ -845,6 +849,7 @@ pub fn execute_search( time_budget, ranking_score_threshold, exhaustive_number_hits, + max_total_hits, )? };