From 4a467739cdfd6c6eafb73ae57649199729bf8d7b Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 5 Mar 2024 11:21:46 +0100 Subject: [PATCH 01/16] implements a first version of the cutoff without settings --- .../src/analytics/segment_analytics.rs | 9 ++ meilisearch/src/search.rs | 39 ++++++--- milli/examples/search.rs | 3 +- milli/src/index.rs | 1 + milli/src/lib.rs | 35 ++++++++ milli/src/search/hybrid.rs | 2 + milli/src/search/mod.rs | 82 +++++++++++-------- milli/src/search/new/bucket_sort.rs | 27 +++++- milli/src/search/new/matches/mod.rs | 3 +- milli/src/search/new/mod.rs | 16 +++- milli/tests/search/mod.rs | 45 +++++++++- 11 files changed, 210 insertions(+), 52 deletions(-) diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 7dfc52900..99298bd43 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -579,6 +579,7 @@ pub struct SearchAggregator { // requests total_received: usize, total_succeeded: usize, + total_degraded: usize, time_spent: BinaryHeap, // sort @@ -758,9 +759,13 @@ impl SearchAggregator { hits_info: _, facet_distribution: _, facet_stats: _, + degraded, } = result; self.total_succeeded = self.total_succeeded.saturating_add(1); + if *degraded { + self.total_degraded = self.total_degraded.saturating_add(1); + } self.time_spent.push(*processing_time_ms as usize); } @@ -802,6 +807,7 @@ impl SearchAggregator { semantic_ratio, embedder, hybrid, + total_degraded, } = other; if self.timestamp.is_none() { @@ -816,6 +822,7 @@ impl SearchAggregator { // request self.total_received = self.total_received.saturating_add(total_received); self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); + self.total_degraded = self.total_degraded.saturating_add(total_degraded); self.time_spent.append(time_spent); // sort @@ -921,6 +928,7 @@ impl SearchAggregator { semantic_ratio, embedder, hybrid, + total_degraded, } = self; if total_received == 0 { @@ -940,6 +948,7 @@ impl SearchAggregator { "total_succeeded": total_succeeded, "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics "total_received": total_received, + "total_degraded": total_degraded, }, "sort": { "with_geoPoint": sort_with_geo_point, diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index e65192d16..9bc7b69fc 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -1,7 +1,7 @@ use std::cmp::min; use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::str::FromStr; -use std::time::Instant; +use std::time::{Duration, Instant}; use deserr::Deserr; use either::Either; @@ -14,7 +14,7 @@ use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::vector::DistributionShift; -use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues}; +use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget}; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; use milli::tokenizer::TokenizerBuilder; @@ -323,6 +323,9 @@ pub struct SearchResult { pub facet_distribution: Option>>, #[serde(skip_serializing_if = "Option::is_none")] pub facet_stats: Option>, + + #[serde(skip_serializing_if = "std::ops::Not::not")] + pub degraded: bool, } #[derive(Serialize, Debug, Clone, PartialEq)] @@ -382,8 +385,10 @@ fn prepare_search<'t>( query: &'t SearchQuery, features: RoFeatures, distribution: Option, + time_budget: TimeBudget, ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { let mut search = index.search(rtxn); + search.time_budget(time_budget); if query.vector.is_some() { features.check_vector("Passing `vector` as a query parameter")?; @@ -491,19 +496,26 @@ pub fn perform_search( distribution: Option, ) -> Result { let before_search = Instant::now(); + let time_budget = TimeBudget::new(Duration::from_millis(150)); let rtxn = index.read_txn()?; let (search, is_finite_pagination, max_total_hits, offset) = - prepare_search(index, &rtxn, &query, features, distribution)?; + prepare_search(index, &rtxn, &query, features, distribution, time_budget)?; - let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } = - match &query.hybrid { - Some(hybrid) => match *hybrid.semantic_ratio { - ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?, - ratio => search.execute_hybrid(ratio)?, - }, - None => search.execute()?, - }; + let milli::SearchResult { + documents_ids, + matching_words, + candidates, + document_scores, + degraded, + .. + } = match &query.hybrid { + Some(hybrid) => match *hybrid.semantic_ratio { + ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?, + ratio => search.execute_hybrid(ratio)?, + }, + None => search.execute()?, + }; let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); @@ -700,6 +712,7 @@ pub fn perform_search( processing_time_ms: before_search.elapsed().as_millis(), facet_distribution, facet_stats, + degraded, }; Ok(result) } @@ -712,9 +725,11 @@ pub fn perform_facet_search( features: RoFeatures, ) -> Result { let before_search = Instant::now(); + let time_budget = TimeBudget::new(Duration::from_millis(150)); let rtxn = index.read_txn()?; - let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?; + let (search, _, _, _) = + prepare_search(index, &rtxn, &search_query, features, None, time_budget)?; let mut facet_search = SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some()); if let Some(facet_query) = &facet_query { diff --git a/milli/examples/search.rs b/milli/examples/search.rs index a94677771..8640acf42 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -6,7 +6,7 @@ use std::time::Instant; use heed::EnvOpenOptions; use milli::{ execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, - SearchLogger, TermsMatchingStrategy, + SearchLogger, TermsMatchingStrategy, TimeBudget, }; #[global_allocator] @@ -65,6 +65,7 @@ fn main() -> Result<(), Box> { None, &mut DefaultSearchLogger, logger, + TimeBudget::max(), )?; if let Some((logger, dir)) = detailed_logger { logger.finish(&mut ctx, Path::new(dir))?; diff --git a/milli/src/index.rs b/milli/src/index.rs index 2c3977403..e79c137e7 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2421,6 +2421,7 @@ pub(crate) mod tests { candidates: _, document_scores: _, mut documents_ids, + degraded: _, } = search.execute().unwrap(); let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap(); documents_ids.sort_unstable(); diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 5effcea3d..eedd25f7e 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -30,6 +30,7 @@ pub mod snapshot_tests; use std::collections::{BTreeMap, HashMap}; use std::convert::{TryFrom, TryInto}; +use std::fmt; use std::hash::BuildHasherDefault; use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; @@ -104,6 +105,40 @@ pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2; pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1; +#[derive(Clone, Copy)] +pub struct TimeBudget { + started_at: std::time::Instant, + budget: std::time::Duration, +} + +impl fmt::Debug for TimeBudget { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TimeBudget") + .field("started_at", &self.started_at) + .field("budget", &self.budget) + .field("left", &(self.budget - self.started_at.elapsed())) + .finish() + } +} + +impl TimeBudget { + pub fn new(budget: std::time::Duration) -> Self { + Self { started_at: std::time::Instant::now(), budget } + } + + pub fn max() -> Self { + Self::new(std::time::Duration::from_secs(u64::MAX)) + } + + pub fn exceeded(&self) -> bool { + self.must_stop() + } + + pub fn must_stop(&self) -> bool { + self.started_at.elapsed() > self.budget + } +} + // Convert an absolute word position into a relative position. // Return the field id of the attribute related to the absolute position // and the relative position in the attribute. diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index b4c79f7f5..9d8b3860d 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -106,6 +106,7 @@ impl ScoreWithRatioResult { candidates: left.candidates | right.candidates, documents_ids, document_scores, + degraded: false, } } } @@ -131,6 +132,7 @@ impl<'a> Search<'a> { index: self.index, distribution_shift: self.distribution_shift, embedder_name: self.embedder_name.clone(), + time_budget: self.time_budget, }; let vector_query = search.vector.take(); diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index dc8354486..b14d88d03 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -11,7 +11,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::vector::DistributionShift; use crate::{ execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, - SearchContext, + SearchContext, TimeBudget, }; // Building these factories is not free. @@ -43,6 +43,8 @@ pub struct Search<'a> { index: &'a Index, distribution_shift: Option, embedder_name: Option, + + time_budget: TimeBudget, } impl<'a> Search<'a> { @@ -64,6 +66,7 @@ impl<'a> Search<'a> { index, distribution_shift: None, embedder_name: None, + time_budget: TimeBudget::max(), } } @@ -143,6 +146,11 @@ impl<'a> Search<'a> { self } + pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> { + self.time_budget = time_budget; + self + } + pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result { if has_vector_search { let ctx = SearchContext::new(self.index, self.rtxn); @@ -169,36 +177,43 @@ impl<'a> Search<'a> { } let universe = filtered_universe(&ctx, &self.filter)?; - let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } = - match self.vector.as_ref() { - Some(vector) => execute_vector_search( - &mut ctx, - vector, - self.scoring_strategy, - universe, - &self.sort_criteria, - self.geo_strategy, - self.offset, - self.limit, - self.distribution_shift, - embedder_name, - )?, - None => execute_search( - &mut ctx, - self.query.as_deref(), - self.terms_matching_strategy, - self.scoring_strategy, - self.exhaustive_number_hits, - universe, - &self.sort_criteria, - self.geo_strategy, - self.offset, - self.limit, - Some(self.words_limit), - &mut DefaultSearchLogger, - &mut DefaultSearchLogger, - )?, - }; + let PartialSearchResult { + located_query_terms, + candidates, + documents_ids, + document_scores, + degraded, + } = match self.vector.as_ref() { + Some(vector) => execute_vector_search( + &mut ctx, + vector, + self.scoring_strategy, + universe, + &self.sort_criteria, + self.geo_strategy, + self.offset, + self.limit, + self.distribution_shift, + embedder_name, + self.time_budget, + )?, + None => execute_search( + &mut ctx, + self.query.as_deref(), + self.terms_matching_strategy, + self.scoring_strategy, + self.exhaustive_number_hits, + universe, + &self.sort_criteria, + self.geo_strategy, + self.offset, + self.limit, + Some(self.words_limit), + &mut DefaultSearchLogger, + &mut DefaultSearchLogger, + self.time_budget, + )?, + }; // consume context and located_query_terms to build MatchingWords. let matching_words = match located_query_terms { @@ -206,7 +221,7 @@ impl<'a> Search<'a> { None => MatchingWords::default(), }; - Ok(SearchResult { matching_words, candidates, document_scores, documents_ids }) + Ok(SearchResult { matching_words, candidates, document_scores, documents_ids, degraded }) } } @@ -229,6 +244,7 @@ impl fmt::Debug for Search<'_> { index: _, distribution_shift, embedder_name, + time_budget, } = self; f.debug_struct("Search") .field("query", query) @@ -244,6 +260,7 @@ impl fmt::Debug for Search<'_> { .field("words_limit", words_limit) .field("distribution_shift", distribution_shift) .field("embedder_name", embedder_name) + .field("time_bduget", time_budget) .finish() } } @@ -254,6 +271,7 @@ pub struct SearchResult { pub candidates: RoaringBitmap, pub documents_ids: Vec, pub document_scores: Vec>, + pub degraded: bool, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index 02528e378..7fc830c1f 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -5,12 +5,14 @@ use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait}; use super::SearchContext; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput}; -use crate::Result; +use crate::{Result, TimeBudget}; pub struct BucketSortOutput { pub docids: Vec, pub scores: Vec>, pub all_candidates: RoaringBitmap, + + pub degraded: bool, } // TODO: would probably be good to regroup some of these inside of a struct? @@ -25,6 +27,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( length: usize, scoring_strategy: ScoringStrategy, logger: &mut dyn SearchLogger, + time_budget: TimeBudget, ) -> Result { logger.initial_query(query); logger.ranking_rules(&ranking_rules); @@ -41,6 +44,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( docids: vec![], scores: vec![], all_candidates: universe.clone(), + degraded: false, }); } if ranking_rules.is_empty() { @@ -74,6 +78,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( scores: vec![Default::default(); results.len()], docids: results, all_candidates, + degraded: false, }); } else { let docids: Vec = universe.iter().skip(from).take(length).collect(); @@ -81,6 +86,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( scores: vec![Default::default(); docids.len()], docids, all_candidates: universe.clone(), + degraded: false, }); }; } @@ -154,6 +160,18 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( } while valid_docids.len() < length { + if time_budget.exceeded() { + let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); + maybe_add_to_results!(bucket); + + return Ok(BucketSortOutput { + scores: vec![Default::default(); valid_docids.len()], + docids: valid_docids, + all_candidates, + degraded: true, + }); + } + // The universe for this bucket is zero, so we don't need to sort // anything, just go back to the parent ranking rule. if ranking_rule_universes[cur_ranking_rule_index].is_empty() @@ -219,7 +237,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( )?; } - Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates }) + Ok(BucketSortOutput { + docids: valid_docids, + scores: valid_scores, + all_candidates, + degraded: false, + }) } /// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset` diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index 8de1d9262..2913f206d 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -502,7 +502,7 @@ mod tests { use super::*; use crate::index::tests::TempIndex; - use crate::{execute_search, filtered_universe, SearchContext}; + use crate::{execute_search, filtered_universe, SearchContext, TimeBudget}; impl<'a> MatcherBuilder<'a> { fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self { @@ -522,6 +522,7 @@ mod tests { Some(10), &mut crate::DefaultSearchLogger, &mut crate::DefaultSearchLogger, + TimeBudget::max(), ) .unwrap(); diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index ae661e3f6..ad996f363 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -52,7 +52,8 @@ use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::apply_distinct_rule; use crate::vector::DistributionShift; use crate::{ - AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, + AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget, + UserError, }; /// A structure used throughout the execution of a search query. @@ -518,6 +519,7 @@ pub fn execute_vector_search( length: usize, distribution_shift: Option, embedder_name: &str, + time_budget: TimeBudget, ) -> Result { check_sort_criteria(ctx, sort_criteria.as_ref())?; @@ -537,7 +539,7 @@ pub fn execute_vector_search( let placeholder_search_logger: &mut dyn SearchLogger = &mut placeholder_search_logger; - let BucketSortOutput { docids, scores, all_candidates } = bucket_sort( + let BucketSortOutput { docids, scores, all_candidates, degraded } = bucket_sort( ctx, ranking_rules, &PlaceholderQuery, @@ -546,6 +548,7 @@ pub fn execute_vector_search( length, scoring_strategy, placeholder_search_logger, + time_budget, )?; Ok(PartialSearchResult { @@ -553,6 +556,7 @@ pub fn execute_vector_search( document_scores: scores, documents_ids: docids, located_query_terms: None, + degraded, }) } @@ -572,6 +576,7 @@ pub fn execute_search( words_limit: Option, placeholder_search_logger: &mut dyn SearchLogger, query_graph_logger: &mut dyn SearchLogger, + time_budget: TimeBudget, ) -> Result { check_sort_criteria(ctx, sort_criteria.as_ref())?; @@ -648,6 +653,7 @@ pub fn execute_search( length, scoring_strategy, query_graph_logger, + time_budget, )? } else { let ranking_rules = @@ -661,10 +667,11 @@ pub fn execute_search( length, scoring_strategy, placeholder_search_logger, + time_budget, )? }; - let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output; + let BucketSortOutput { docids, scores, mut all_candidates, degraded } = bucket_sort_output; let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; // The candidates is the universe unless the exhaustive number of hits @@ -682,6 +689,7 @@ pub fn execute_search( document_scores: scores, documents_ids: docids, located_query_terms, + degraded, }) } @@ -742,4 +750,6 @@ pub struct PartialSearchResult { pub candidates: RoaringBitmap, pub documents_ids: Vec, pub document_scores: Vec>, + + pub degraded: bool, } diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index 9193ab762..ab6befa60 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -1,14 +1,19 @@ use std::cmp::Reverse; use std::collections::HashSet; use std::io::Cursor; +use std::time::Duration; use big_s::S; use either::{Either, Left, Right}; use heed::EnvOpenOptions; use maplit::{btreemap, hashset}; +use meili_snap::snapshot; use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; -use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy}; +use milli::{ + AscDesc, Criterion, DocumentId, Filter, Index, Member, Object, Search, TermsMatchingStrategy, + TimeBudget, +}; use serde::{Deserialize, Deserializer}; use slice_group_by::GroupBy; @@ -349,3 +354,41 @@ where let result = serde_json::Value::deserialize(deserializer)?; Ok(Some(result)) } + +#[test] +fn basic_degraded_search() { + use Criterion::*; + let criteria = vec![Words, Typo, Proximity, Attribute, Exactness]; + let index = setup_search_index_with_criteria(&criteria); + let rtxn = index.read_txn().unwrap(); + + let mut search = Search::new(&rtxn, &index); + search.query(TEST_QUERY); + search.limit(EXTERNAL_DOCUMENTS_IDS.len()); + search.time_budget(TimeBudget::new(Duration::from_millis(0))); + + let result = search.execute().unwrap(); + assert!(result.degraded); +} + +#[test] +fn degraded_search_cannot_skip_filter() { + use Criterion::*; + let criteria = vec![Words, Typo, Proximity, Attribute, Exactness]; + let index = setup_search_index_with_criteria(&criteria); + let rtxn = index.read_txn().unwrap(); + + let mut search = Search::new(&rtxn, &index); + search.query(TEST_QUERY); + search.limit(EXTERNAL_DOCUMENTS_IDS.len()); + search.time_budget(TimeBudget::new(Duration::from_millis(0))); + let filter_condition = Filter::from_str("tag = etiopia").unwrap().unwrap(); + search.filter(filter_condition); + + let result = search.execute().unwrap(); + assert!(result.degraded); + snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###" + RoaringBitmap<[0, 2, 5, 8, 11, 14]> + [0, 2, 5, 8, 11, 14] + "###); +} From d1db4951195bae1aaa13dd3481bc4ff0003122d7 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 11 Mar 2024 18:24:21 +0100 Subject: [PATCH 02/16] add a settings for the search cutoff --- dump/src/lib.rs | 1 + dump/src/reader/compat/v5_to_v6.rs | 1 + meilisearch-types/src/error.rs | 1 + meilisearch-types/src/settings.rs | 76 +++++++++++++++++----- meilisearch/src/routes/indexes/settings.rs | 22 ++++++- meilisearch/src/search.rs | 5 +- meilisearch/tests/dumps/mod.rs | 39 +++++++---- meilisearch/tests/settings/get_settings.rs | 7 +- milli/src/index.rs | 13 ++++ milli/src/lib.rs | 6 ++ milli/src/update/settings.rs | 33 ++++++++++ 11 files changed, 169 insertions(+), 35 deletions(-) diff --git a/dump/src/lib.rs b/dump/src/lib.rs index be0053a7c..e7cadacbe 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -277,6 +277,7 @@ pub(crate) mod test { }), pagination: Setting::NotSet, embedders: Setting::NotSet, + search_cutoff: Setting::NotSet, _kind: std::marker::PhantomData, }; settings.check() diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs index e00d3a599..2b8997847 100644 --- a/dump/src/reader/compat/v5_to_v6.rs +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -379,6 +379,7 @@ impl From> for v6::Settings { v5::Setting::NotSet => v6::Setting::NotSet, }, embedders: v6::Setting::NotSet, + search_cutoff: v6::Setting::NotSet, _kind: std::marker::PhantomData, } } diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 965d2e672..bf9492ff6 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -259,6 +259,7 @@ InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ; InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ; +InvalidSettingsSearchCutoff , InvalidRequest , BAD_REQUEST ; InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ; InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ; InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index ca46abb0c..d05201943 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -202,6 +202,9 @@ pub struct Settings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] pub embedders: Setting>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default, error = DeserrJsonError)] + pub search_cutoff: Setting, #[serde(skip)] #[deserr(skip)] @@ -227,6 +230,7 @@ impl Settings { faceting: Setting::Reset, pagination: Setting::Reset, embedders: Setting::Reset, + search_cutoff: Setting::Reset, _kind: PhantomData, } } @@ -249,6 +253,7 @@ impl Settings { faceting, pagination, embedders, + search_cutoff, .. } = self; @@ -269,6 +274,7 @@ impl Settings { faceting, pagination, embedders, + search_cutoff, _kind: PhantomData, } } @@ -315,6 +321,7 @@ impl Settings { faceting: self.faceting, pagination: self.pagination, embedders: self.embedders, + search_cutoff: self.search_cutoff, _kind: PhantomData, } } @@ -347,19 +354,40 @@ pub fn apply_settings_to_builder( settings: &Settings, builder: &mut milli::update::Settings, ) { - match settings.searchable_attributes { + let Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + non_separator_tokens, + separator_tokens, + dictionary, + synonyms, + distinct_attribute, + proximity_precision, + typo_tolerance, + faceting, + pagination, + embedders, + search_cutoff, + _kind, + } = settings; + + match searchable_attributes { Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), Setting::Reset => builder.reset_searchable_fields(), Setting::NotSet => (), } - match settings.displayed_attributes { + match displayed_attributes { Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), Setting::Reset => builder.reset_displayed_fields(), Setting::NotSet => (), } - match settings.filterable_attributes { + match filterable_attributes { Setting::Set(ref facets) => { builder.set_filterable_fields(facets.clone().into_iter().collect()) } @@ -367,13 +395,13 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.sortable_attributes { + match sortable_attributes { Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()), Setting::Reset => builder.reset_sortable_fields(), Setting::NotSet => (), } - match settings.ranking_rules { + match ranking_rules { Setting::Set(ref criteria) => { builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect()) } @@ -381,13 +409,13 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.stop_words { + match stop_words { Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), Setting::Reset => builder.reset_stop_words(), Setting::NotSet => (), } - match settings.non_separator_tokens { + match non_separator_tokens { Setting::Set(ref non_separator_tokens) => { builder.set_non_separator_tokens(non_separator_tokens.clone()) } @@ -395,7 +423,7 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.separator_tokens { + match separator_tokens { Setting::Set(ref separator_tokens) => { builder.set_separator_tokens(separator_tokens.clone()) } @@ -403,31 +431,31 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.dictionary { + match dictionary { Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()), Setting::Reset => builder.reset_dictionary(), Setting::NotSet => (), } - match settings.synonyms { + match synonyms { Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()), Setting::Reset => builder.reset_synonyms(), Setting::NotSet => (), } - match settings.distinct_attribute { + match distinct_attribute { Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), Setting::Reset => builder.reset_distinct_field(), Setting::NotSet => (), } - match settings.proximity_precision { + match proximity_precision { Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()), Setting::Reset => builder.reset_proximity_precision(), Setting::NotSet => (), } - match settings.typo_tolerance { + match typo_tolerance { Setting::Set(ref value) => { match value.enabled { Setting::Set(val) => builder.set_autorize_typos(val), @@ -482,7 +510,7 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match &settings.faceting { + match faceting { Setting::Set(FacetingSettings { max_values_per_facet, sort_facet_values_by }) => { match max_values_per_facet { Setting::Set(val) => builder.set_max_values_per_facet(*val), @@ -504,7 +532,7 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.pagination { + match pagination { Setting::Set(ref value) => match value.max_total_hits { Setting::Set(val) => builder.set_pagination_max_total_hits(val), Setting::Reset => builder.reset_pagination_max_total_hits(), @@ -514,11 +542,17 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.embedders.clone() { - Setting::Set(value) => builder.set_embedder_settings(value), + match embedders { + Setting::Set(value) => builder.set_embedder_settings(value.clone()), Setting::Reset => builder.reset_embedder_settings(), Setting::NotSet => (), } + + match search_cutoff { + Setting::Set(cutoff) => builder.set_search_cutoff(*cutoff), + Setting::Reset => builder.reset_search_cutoff(), + Setting::NotSet => (), + } } pub fn settings( @@ -607,6 +641,8 @@ pub fn settings( .collect(); let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) }; + let search_cutoff = index.search_cutoff(rtxn)?; + Ok(Settings { displayed_attributes: match displayed_attributes { Some(attrs) => Setting::Set(attrs), @@ -633,6 +669,10 @@ pub fn settings( faceting: Setting::Set(faceting), pagination: Setting::Set(pagination), embedders, + search_cutoff: match search_cutoff { + Some(cutoff) => Setting::Set(cutoff), + None => Setting::Reset, + }, _kind: PhantomData, }) } @@ -783,6 +823,7 @@ pub(crate) mod test { faceting: Setting::NotSet, pagination: Setting::NotSet, embedders: Setting::NotSet, + search_cutoff: Setting::NotSet, _kind: PhantomData::, }; @@ -809,6 +850,7 @@ pub(crate) mod test { faceting: Setting::NotSet, pagination: Setting::NotSet, embedders: Setting::NotSet, + search_cutoff: Setting::NotSet, _kind: PhantomData::, }; diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index c782e78cb..1d03c9a91 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -624,6 +624,25 @@ fn embedder_analytics( ) } +make_setting_route!( + "/search_cutoff", + patch, + u64, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoff, + >, + search_cutoff, + "search_cutoff", + analytics, + |setting: &Option, req: &HttpRequest| { + analytics.publish( + "Search Cutoff Updated".to_string(), + serde_json::json!({"search_cutoff": setting }), + Some(req), + ); + } +); + macro_rules! generate_configure { ($($mod:ident),*) => { pub fn configure(cfg: &mut web::ServiceConfig) { @@ -765,7 +784,8 @@ pub async fn update_all( "synonyms": { "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()), }, - "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()) + "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()), + "search_cutoff": new_settings.search_cutoff.as_ref().set(), }), Some(&req), ); diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 9bc7b69fc..f83e14187 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -496,8 +496,11 @@ pub fn perform_search( distribution: Option, ) -> Result { let before_search = Instant::now(); - let time_budget = TimeBudget::new(Duration::from_millis(150)); let rtxn = index.read_txn()?; + let time_budget = match index.search_cutoff(&rtxn)? { + Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), + None => TimeBudget::default(), + }; let (search, is_finite_pagination, max_total_hits, offset) = prepare_search(index, &rtxn, &query, features, distribution, time_budget)?; diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index e8061ae4a..7bf97f8b2 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -77,7 +77,8 @@ async fn import_dump_v1_movie_raw() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -238,7 +239,8 @@ async fn import_dump_v1_movie_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -385,7 +387,8 @@ async fn import_dump_v1_rubygems_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -518,7 +521,8 @@ async fn import_dump_v2_movie_raw() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -663,7 +667,8 @@ async fn import_dump_v2_movie_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -807,7 +812,8 @@ async fn import_dump_v2_rubygems_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -940,7 +946,8 @@ async fn import_dump_v3_movie_raw() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -1085,7 +1092,8 @@ async fn import_dump_v3_movie_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -1229,7 +1237,8 @@ async fn import_dump_v3_rubygems_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -1362,7 +1371,8 @@ async fn import_dump_v4_movie_raw() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -1507,7 +1517,8 @@ async fn import_dump_v4_movie_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -1651,7 +1662,8 @@ async fn import_dump_v4_rubygems_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "### ); @@ -1895,7 +1907,8 @@ async fn import_dump_v6_containing_experimental_features() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoff": null } "###); diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index 5642e854f..000443f36 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -49,12 +49,12 @@ async fn get_settings_unexisting_index() { async fn get_settings() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (response, _code) = index.create(None).await; + index.wait_task(response.uid()).await; let (response, code) = index.settings().await; assert_eq!(code, 200); let settings = response.as_object().unwrap(); - assert_eq!(settings.keys().len(), 15); + assert_eq!(settings.keys().len(), 16); assert_eq!(settings["displayedAttributes"], json!(["*"])); assert_eq!(settings["searchableAttributes"], json!(["*"])); assert_eq!(settings["filterableAttributes"], json!([])); @@ -84,6 +84,7 @@ async fn get_settings() { }) ); assert_eq!(settings["proximityPrecision"], json!("byWord")); + assert_eq!(settings["searchCutoff"], json!(null)); } #[actix_rt::test] diff --git a/milli/src/index.rs b/milli/src/index.rs index e79c137e7..d921de9e4 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -67,6 +67,7 @@ pub mod main_key { pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits"; pub const PROXIMITY_PRECISION: &str = "proximity-precision"; pub const EMBEDDING_CONFIGS: &str = "embedding_configs"; + pub const SEARCH_CUTOFF: &str = "search_cutoff"; } pub mod db_name { @@ -1505,6 +1506,18 @@ impl Index { _ => "default".to_owned(), }) } + + pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> { + self.main.remap_types::().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff) + } + + pub fn search_cutoff(&self, rtxn: &RoTxn<'_>) -> Result> { + Ok(self.main.remap_types::().get(rtxn, main_key::SEARCH_CUTOFF)?) + } + + pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result { + self.main.remap_key_type::().delete(wtxn, main_key::SEARCH_CUTOFF) + } } #[cfg(test)] diff --git a/milli/src/lib.rs b/milli/src/lib.rs index eedd25f7e..896aadb50 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -121,6 +121,12 @@ impl fmt::Debug for TimeBudget { } } +impl Default for TimeBudget { + fn default() -> Self { + Self::new(std::time::Duration::from_millis(150)) + } +} + impl TimeBudget { pub fn new(budget: std::time::Duration) -> Self { Self { started_at: std::time::Instant::now(), budget } diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 63b45e3aa..1e720ba56 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -150,6 +150,7 @@ pub struct Settings<'a, 't, 'i> { pagination_max_total_hits: Setting, proximity_precision: Setting, embedder_settings: Setting>>, + search_cutoff: Setting, } impl<'a, 't, 'i> Settings<'a, 't, 'i> { @@ -183,6 +184,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { pagination_max_total_hits: Setting::NotSet, proximity_precision: Setting::NotSet, embedder_settings: Setting::NotSet, + search_cutoff: Setting::NotSet, indexer_config, } } @@ -373,6 +375,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.embedder_settings = Setting::Reset; } + pub fn set_search_cutoff(&mut self, value: u64) { + self.search_cutoff = Setting::Set(value); + } + + pub fn reset_search_cutoff(&mut self) { + self.search_cutoff = Setting::Reset; + } + #[tracing::instrument( level = "trace" skip(self, progress_callback, should_abort, old_fields_ids_map), @@ -1026,6 +1036,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { Ok(update) } + fn update_search_cutoff(&mut self) -> Result { + let changed = match self.search_cutoff { + Setting::Set(new) => { + let old = self.index.search_cutoff(self.wtxn)?; + if old == Some(new) { + false + } else { + self.index.put_search_cutoff(self.wtxn, new)?; + true + } + } + Setting::Reset => self.index.delete_search_cutoff(self.wtxn)?, + Setting::NotSet => false, + }; + + Ok(changed) + } + pub fn execute(mut self, progress_callback: FP, should_abort: FA) -> Result<()> where FP: Fn(UpdateIndexingStep) + Sync, @@ -1071,6 +1099,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { // 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage let embedding_configs_updated = self.update_embedding_configs()?; + // never trigger re-indexing + self.update_search_cutoff()?; + if stop_words_updated || non_separator_tokens_updated || separator_tokens_updated @@ -2027,6 +2058,7 @@ mod tests { pagination_max_total_hits, proximity_precision, embedder_settings, + search_cutoff, } = settings; assert!(matches!(searchable_fields, Setting::NotSet)); assert!(matches!(displayed_fields, Setting::NotSet)); @@ -2050,6 +2082,7 @@ mod tests { assert!(matches!(pagination_max_total_hits, Setting::NotSet)); assert!(matches!(proximity_precision, Setting::NotSet)); assert!(matches!(embedder_settings, Setting::NotSet)); + assert!(matches!(search_cutoff, Setting::NotSet)); }) .unwrap(); } From b72495eb5892cf56d3b30b6d575491b1e80f6889 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 12 Mar 2024 18:19:02 +0100 Subject: [PATCH 03/16] fix the settings tests --- meilisearch/src/routes/indexes/settings.rs | 10 ++++++---- meilisearch/tests/settings/get_settings.rs | 4 +++- 2 files changed, 9 insertions(+), 5 deletions(-) diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 1d03c9a91..41fc58a87 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -138,6 +138,7 @@ macro_rules! make_setting_route { debug!(returns = ?settings, "Update settings"); let mut json = serde_json::json!(&settings); + dbg!(&json); let val = json[$camelcase_attr].take(); Ok(HttpResponse::Ok().json(val)) @@ -625,14 +626,14 @@ fn embedder_analytics( } make_setting_route!( - "/search_cutoff", - patch, + "/search-cutoff", + put, u64, meilisearch_types::deserr::DeserrJsonError< meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoff, >, search_cutoff, - "search_cutoff", + "searchCutoff", analytics, |setting: &Option, req: &HttpRequest| { analytics.publish( @@ -673,7 +674,8 @@ generate_configure!( typo_tolerance, pagination, faceting, - embedders + embedders, + search_cutoff ); pub async fn update_all( diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index 000443f36..d573f38e0 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -35,6 +35,7 @@ static DEFAULT_SETTINGS_VALUES: Lazy> = Lazy::new(| "maxTotalHits": json!(1000), }), ); + map.insert("search_cutoff", json!(null)); map }); @@ -286,7 +287,8 @@ test_setting_routes!( ranking_rules put, synonyms put, pagination patch, - faceting patch + faceting patch, + search_cutoff put ); #[actix_rt::test] From b8cda6c300f8ca351a739319b2fcfadcc80e327b Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 14 Mar 2024 17:34:46 +0100 Subject: [PATCH 04/16] fix the search cutoff and add a test --- meilisearch/tests/search/mod.rs | 109 +++++++ milli/src/lib.rs | 37 ++- milli/src/score_details.rs | 12 + milli/src/search/hybrid.rs | 2 +- milli/src/search/mod.rs | 4 +- milli/src/search/new/bucket_sort.rs | 16 +- milli/src/search/new/tests/cutoff.rs | 419 +++++++++++++++++++++++++++ milli/src/search/new/tests/mod.rs | 1 + milli/tests/search/mod.rs | 45 +-- 9 files changed, 590 insertions(+), 55 deletions(-) create mode 100644 milli/src/search/new/tests/cutoff.rs diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 90098c5b6..62dd73c63 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -834,6 +834,115 @@ async fn test_score_details() { .await; } +#[actix_rt::test] +async fn test_degraded_score_details() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = NESTED_DOCUMENTS.clone(); + + index.add_documents(json!(documents), None).await; + // We can't really use anything else than 0ms here; otherwise, the test will get flaky. + let (res, _code) = index.update_settings(json!({ "searchCutoff": 0 })).await; + index.wait_task(res.uid()).await; + + index + .search( + json!({ + "q": "b", + "showRankingScoreDetails": true, + }), + |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 852, + "father": "jean", + "mother": "michelle", + "doggos": [ + { + "name": "bobby", + "age": 2 + }, + { + "name": "buddy", + "age": 4 + } + ], + "cattos": "pésti", + "_vectors": { + "manual": [ + 1, + 2, + 3 + ] + }, + "_rankingScoreDetails": { + "skipped": 0.0 + } + }, + { + "id": 654, + "father": "pierre", + "mother": "sabine", + "doggos": [ + { + "name": "gros bill", + "age": 8 + } + ], + "cattos": [ + "simba", + "pestiféré" + ], + "_vectors": { + "manual": [ + 1, + 2, + 54 + ] + }, + "_rankingScoreDetails": { + "skipped": 0.0 + } + }, + { + "id": 951, + "father": "jean-baptiste", + "mother": "sophie", + "doggos": [ + { + "name": "turbo", + "age": 5 + }, + { + "name": "fast", + "age": 6 + } + ], + "cattos": [ + "moumoute", + "gomez" + ], + "_vectors": { + "manual": [ + 10, + 23, + 32 + ] + }, + "_rankingScoreDetails": { + "skipped": 0.0 + } + } + ] + "###); + }, + ) + .await; +} + #[actix_rt::test] async fn experimental_feature_vector_store() { let server = Server::new().await; diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 896aadb50..df44ca127 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -105,10 +105,15 @@ pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2; pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1; -#[derive(Clone, Copy)] +#[derive(Clone)] pub struct TimeBudget { started_at: std::time::Instant, budget: std::time::Duration, + + /// When testing the time budget, ensuring we did more than iteration of the bucket sort can be useful. + /// But to avoid being flaky, the only option is to add the ability to stop after a specific number of calls instead of a `Duration`. + #[cfg(test)] + stop_after: Option<(std::sync::Arc, usize)>, } impl fmt::Debug for TimeBudget { @@ -129,18 +134,40 @@ impl Default for TimeBudget { impl TimeBudget { pub fn new(budget: std::time::Duration) -> Self { - Self { started_at: std::time::Instant::now(), budget } + Self { + started_at: std::time::Instant::now(), + budget, + + #[cfg(test)] + stop_after: None, + } } pub fn max() -> Self { Self::new(std::time::Duration::from_secs(u64::MAX)) } - pub fn exceeded(&self) -> bool { - self.must_stop() + #[cfg(test)] + pub fn with_stop_after(mut self, stop_after: usize) -> Self { + use std::sync::atomic::AtomicUsize; + use std::sync::Arc; + + self.stop_after = Some((Arc::new(AtomicUsize::new(0)), stop_after)); + self } - pub fn must_stop(&self) -> bool { + pub fn exceeded(&self) -> bool { + #[cfg(test)] + if let Some((current, stop_after)) = &self.stop_after { + let current = current.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + if current >= *stop_after { + return true; + } else { + // if a number has been specified then we ignore entirely the time budget + return false; + } + } + self.started_at.elapsed() > self.budget } } diff --git a/milli/src/score_details.rs b/milli/src/score_details.rs index f6b9db58c..f2c6fb58a 100644 --- a/milli/src/score_details.rs +++ b/milli/src/score_details.rs @@ -17,6 +17,9 @@ pub enum ScoreDetails { Sort(Sort), Vector(Vector), GeoSort(GeoSort), + + /// Returned when we don't have the time to finish applying all the subsequent ranking-rules + Skipped, } #[derive(Clone, Copy)] @@ -50,6 +53,7 @@ impl ScoreDetails { ScoreDetails::Sort(_) => None, ScoreDetails::GeoSort(_) => None, ScoreDetails::Vector(_) => None, + ScoreDetails::Skipped => Some(Rank { rank: 0, max_rank: 1 }), } } @@ -97,6 +101,7 @@ impl ScoreDetails { ScoreDetails::Vector(vector) => RankOrValue::Score( vector.value_similarity.as_ref().map(|(_, s)| *s as f64).unwrap_or(0.0f64), ), + ScoreDetails::Skipped => RankOrValue::Score(0.), } } @@ -256,6 +261,13 @@ impl ScoreDetails { details_map.insert(vector, details); order += 1; } + ScoreDetails::Skipped => { + details_map.insert( + "skipped".to_string(), + serde_json::Number::from_f64(0.).unwrap().into(), + ); + order += 1; + } } } details_map diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index 9d8b3860d..1e14f4430 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -132,7 +132,7 @@ impl<'a> Search<'a> { index: self.index, distribution_shift: self.distribution_shift, embedder_name: self.embedder_name.clone(), - time_budget: self.time_budget, + time_budget: self.time_budget.clone(), }; let vector_query = search.vector.take(); diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index b14d88d03..f6ab8a7de 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -195,7 +195,7 @@ impl<'a> Search<'a> { self.limit, self.distribution_shift, embedder_name, - self.time_budget, + self.time_budget.clone(), )?, None => execute_search( &mut ctx, @@ -211,7 +211,7 @@ impl<'a> Search<'a> { Some(self.words_limit), &mut DefaultSearchLogger, &mut DefaultSearchLogger, - self.time_budget, + self.time_budget.clone(), )?, }; diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index 7fc830c1f..521fcb983 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -161,11 +161,21 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( while valid_docids.len() < length { if time_budget.exceeded() { - let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); - maybe_add_to_results!(bucket); + loop { + let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); + ranking_rule_scores.push(ScoreDetails::Skipped); + maybe_add_to_results!(bucket); + ranking_rule_scores.pop(); + + if cur_ranking_rule_index == 0 { + break; + } + + back!(); + } return Ok(BucketSortOutput { - scores: vec![Default::default(); valid_docids.len()], + scores: valid_scores, docids: valid_docids, all_candidates, degraded: true, diff --git a/milli/src/search/new/tests/cutoff.rs b/milli/src/search/new/tests/cutoff.rs new file mode 100644 index 000000000..4256abc2b --- /dev/null +++ b/milli/src/search/new/tests/cutoff.rs @@ -0,0 +1,419 @@ +//! This module test the search cutoff and ensure a few things: +//! 1. A basic test works and mark the search as degraded +//! 2. A test that ensure the filters are affectively applied even with a cutoff of 0 +//! 3. A test that ensure the cutoff works well with the ranking scores + +use std::time::Duration; + +use big_s::S; +use maplit::hashset; +use meili_snap::snapshot; + +use crate::index::tests::TempIndex; +use crate::{Criterion, Filter, Search, TimeBudget}; + +fn create_index() -> TempIndex { + let index = TempIndex::new(); + + index + .update_settings(|s| { + s.set_primary_key("id".to_owned()); + s.set_searchable_fields(vec!["text".to_owned()]); + s.set_filterable_fields(hashset! { S("id") }); + s.set_criteria(vec![Criterion::Words, Criterion::Typo]); + }) + .unwrap(); + + // reverse the ID / insertion order so we see better what was sorted from what got the insertion order ordering + index + .add_documents(documents!([ + { + "id": 4, + "text": "hella puppo kefir", + }, + { + "id": 3, + "text": "hella puppy kefir", + }, + { + "id": 2, + "text": "hello", + }, + { + "id": 1, + "text": "hello puppy", + }, + { + "id": 0, + "text": "hello puppy kefir", + }, + ])) + .unwrap(); + index +} + +#[test] +fn basic_degraded_search() { + let index = create_index(); + let rtxn = index.read_txn().unwrap(); + + let mut search = Search::new(&rtxn, &index); + search.query("hello puppy kefir"); + search.limit(3); + search.time_budget(TimeBudget::new(Duration::from_millis(0))); + + let result = search.execute().unwrap(); + assert!(result.degraded); +} + +#[test] +fn degraded_search_cannot_skip_filter() { + let index = create_index(); + let rtxn = index.read_txn().unwrap(); + + let mut search = Search::new(&rtxn, &index); + search.query("hello puppy kefir"); + search.limit(100); + search.time_budget(TimeBudget::new(Duration::from_millis(0))); + let filter_condition = Filter::from_str("id > 2").unwrap().unwrap(); + search.filter(filter_condition); + + let result = search.execute().unwrap(); + assert!(result.degraded); + snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###" + RoaringBitmap<[0, 1]> + [0, 1] + "###); +} + +#[test] +fn degraded_search_and_score_details() { + let index = create_index(); + let rtxn = index.read_txn().unwrap(); + + let mut search = Search::new(&rtxn, &index); + search.query("hello puppy kefir"); + search.limit(4); + search.time_budget(TimeBudget::max()); + + let result = search.execute().unwrap(); + snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" + [ + 4, + 1, + 0, + 3, + ] + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 2, + max_matching_words: 3, + }, + ), + ], + ] + "###); + + // Do ONE loop iteration. Not much can be deduced, almost everyone matched the words first bucket. + search.time_budget(TimeBudget::max().with_stop_after(1)); + + let result = search.execute().unwrap(); + snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" + [ + 0, + 1, + 4, + 2, + ] + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Skipped, + ], + ] + "###); + + // Do TWO loop iterations. The first document should be entirely sorted + search.time_budget(TimeBudget::max().with_stop_after(2)); + + let result = search.execute().unwrap(); + snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" + [ + 4, + 0, + 1, + 2, + ] + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Skipped, + ], + ] + "###); + + // Do THREE loop iterations. The second document should be entirely sorted as well + search.time_budget(TimeBudget::max().with_stop_after(3)); + + let result = search.execute().unwrap(); + snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" + [ + 4, + 1, + 0, + 2, + ] + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Skipped, + ], + ] + "###); + + // Do FOUR loop iterations. The third document should be entirely sorted as well + // The words bucket have still not progressed thus the last document doesn't have any info yet. + search.time_budget(TimeBudget::max().with_stop_after(4)); + + let result = search.execute().unwrap(); + snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" + [ + 4, + 1, + 0, + 2, + ] + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + ], + [ + Skipped, + ], + ] + "###); + + // After FIVE loop iteration. The words ranking rule gave us a new bucket. + // Since we reached the limit we were able to early exit without checking the typo ranking rule. + search.time_budget(TimeBudget::max().with_stop_after(5)); + + let result = search.execute().unwrap(); + snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" + [ + 4, + 1, + 0, + 3, + ] + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 2, + max_matching_words: 3, + }, + ), + ], + ] + "###); +} diff --git a/milli/src/search/new/tests/mod.rs b/milli/src/search/new/tests/mod.rs index e500d16fb..26199b79b 100644 --- a/milli/src/search/new/tests/mod.rs +++ b/milli/src/search/new/tests/mod.rs @@ -1,5 +1,6 @@ pub mod attribute_fid; pub mod attribute_position; +pub mod cutoff; pub mod distinct; pub mod exactness; pub mod geo_sort; diff --git a/milli/tests/search/mod.rs b/milli/tests/search/mod.rs index ab6befa60..9193ab762 100644 --- a/milli/tests/search/mod.rs +++ b/milli/tests/search/mod.rs @@ -1,19 +1,14 @@ use std::cmp::Reverse; use std::collections::HashSet; use std::io::Cursor; -use std::time::Duration; use big_s::S; use either::{Either, Left, Right}; use heed::EnvOpenOptions; use maplit::{btreemap, hashset}; -use meili_snap::snapshot; use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings}; -use milli::{ - AscDesc, Criterion, DocumentId, Filter, Index, Member, Object, Search, TermsMatchingStrategy, - TimeBudget, -}; +use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy}; use serde::{Deserialize, Deserializer}; use slice_group_by::GroupBy; @@ -354,41 +349,3 @@ where let result = serde_json::Value::deserialize(deserializer)?; Ok(Some(result)) } - -#[test] -fn basic_degraded_search() { - use Criterion::*; - let criteria = vec![Words, Typo, Proximity, Attribute, Exactness]; - let index = setup_search_index_with_criteria(&criteria); - let rtxn = index.read_txn().unwrap(); - - let mut search = Search::new(&rtxn, &index); - search.query(TEST_QUERY); - search.limit(EXTERNAL_DOCUMENTS_IDS.len()); - search.time_budget(TimeBudget::new(Duration::from_millis(0))); - - let result = search.execute().unwrap(); - assert!(result.degraded); -} - -#[test] -fn degraded_search_cannot_skip_filter() { - use Criterion::*; - let criteria = vec![Words, Typo, Proximity, Attribute, Exactness]; - let index = setup_search_index_with_criteria(&criteria); - let rtxn = index.read_txn().unwrap(); - - let mut search = Search::new(&rtxn, &index); - search.query(TEST_QUERY); - search.limit(EXTERNAL_DOCUMENTS_IDS.len()); - search.time_budget(TimeBudget::new(Duration::from_millis(0))); - let filter_condition = Filter::from_str("tag = etiopia").unwrap().unwrap(); - search.filter(filter_condition); - - let result = search.execute().unwrap(); - assert!(result.degraded); - snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###" - RoaringBitmap<[0, 2, 5, 8, 11, 14]> - [0, 2, 5, 8, 11, 14] - "###); -} From ad9192fbbf38a29413b20cdf7678a522673b8ad7 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 14 Mar 2024 17:42:33 +0100 Subject: [PATCH 05/16] reduce the size of an integration test --- meilisearch/tests/search/mod.rs | 46 +++++---------------------------- 1 file changed, 6 insertions(+), 40 deletions(-) diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 62dd73c63..8c947a329 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -850,6 +850,7 @@ async fn test_degraded_score_details() { .search( json!({ "q": "b", + "attributesToRetrieve": ["doggos.name", "cattos"], "showRankingScoreDetails": true, }), |response, code| { @@ -857,81 +858,46 @@ async fn test_degraded_score_details() { meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" [ { - "id": 852, - "father": "jean", - "mother": "michelle", "doggos": [ { - "name": "bobby", - "age": 2 + "name": "bobby" }, { - "name": "buddy", - "age": 4 + "name": "buddy" } ], "cattos": "pésti", - "_vectors": { - "manual": [ - 1, - 2, - 3 - ] - }, "_rankingScoreDetails": { "skipped": 0.0 } }, { - "id": 654, - "father": "pierre", - "mother": "sabine", "doggos": [ { - "name": "gros bill", - "age": 8 + "name": "gros bill" } ], "cattos": [ "simba", "pestiféré" ], - "_vectors": { - "manual": [ - 1, - 2, - 54 - ] - }, "_rankingScoreDetails": { "skipped": 0.0 } }, { - "id": 951, - "father": "jean-baptiste", - "mother": "sophie", "doggos": [ { - "name": "turbo", - "age": 5 + "name": "turbo" }, { - "name": "fast", - "age": 6 + "name": "fast" } ], "cattos": [ "moumoute", "gomez" ], - "_vectors": { - "manual": [ - 10, - 23, - 32 - ] - }, "_rankingScoreDetails": { "skipped": 0.0 } From 038c26c118ef041e5843f29b5f4862c879e39979 Mon Sep 17 00:00:00 2001 From: Tamo Date: Thu, 14 Mar 2024 17:52:08 +0100 Subject: [PATCH 06/16] stop returning the degraded boolean when a search was cutoff --- meilisearch/src/search.rs | 3 +- meilisearch/tests/search/mod.rs | 95 ++++++++++++++++++--------------- 2 files changed, 53 insertions(+), 45 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index f83e14187..0333eb0d5 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -324,7 +324,8 @@ pub struct SearchResult { #[serde(skip_serializing_if = "Option::is_none")] pub facet_stats: Option>, - #[serde(skip_serializing_if = "std::ops::Not::not")] + // This information is only used for analytics purposes + #[serde(skip)] pub degraded: bool, } diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 8c947a329..3e5c4278a 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -855,54 +855,61 @@ async fn test_degraded_score_details() { }), |response, code| { meili_snap::snapshot!(code, @"200 OK"); - meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" - [ - { - "doggos": [ - { - "name": "bobby" - }, - { - "name": "buddy" + meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + { + "hits": [ + { + "doggos": [ + { + "name": "bobby" + }, + { + "name": "buddy" + } + ], + "cattos": "pésti", + "_rankingScoreDetails": { + "skipped": 0.0 } - ], - "cattos": "pésti", - "_rankingScoreDetails": { - "skipped": 0.0 - } - }, - { - "doggos": [ - { - "name": "gros bill" + }, + { + "doggos": [ + { + "name": "gros bill" + } + ], + "cattos": [ + "simba", + "pestiféré" + ], + "_rankingScoreDetails": { + "skipped": 0.0 } - ], - "cattos": [ - "simba", - "pestiféré" - ], - "_rankingScoreDetails": { - "skipped": 0.0 - } - }, - { - "doggos": [ - { - "name": "turbo" - }, - { - "name": "fast" + }, + { + "doggos": [ + { + "name": "turbo" + }, + { + "name": "fast" + } + ], + "cattos": [ + "moumoute", + "gomez" + ], + "_rankingScoreDetails": { + "skipped": 0.0 } - ], - "cattos": [ - "moumoute", - "gomez" - ], - "_rankingScoreDetails": { - "skipped": 0.0 } - } - ] + ], + "query": "b", + "processingTimeMs": 0, + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } "###); }, ) From 6a0c399c2f827a46600deda0b0d3695d1ed6af19 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 18 Mar 2024 12:06:00 +0100 Subject: [PATCH 07/16] rename the search_cutoff parameter to search_cutoff_ms --- dump/src/lib.rs | 2 +- dump/src/reader/compat/v5_to_v6.rs | 2 +- meilisearch-types/src/settings.rs | 22 +++++++++--------- meilisearch/src/routes/indexes/settings.rs | 12 +++++----- meilisearch/tests/common/mod.rs | 1 + meilisearch/tests/dumps/mod.rs | 26 +++++++++++----------- meilisearch/tests/search/mod.rs | 6 ++--- meilisearch/tests/settings/get_settings.rs | 6 ++--- 8 files changed, 39 insertions(+), 38 deletions(-) diff --git a/dump/src/lib.rs b/dump/src/lib.rs index e7cadacbe..a7af2d5d0 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -277,7 +277,7 @@ pub(crate) mod test { }), pagination: Setting::NotSet, embedders: Setting::NotSet, - search_cutoff: Setting::NotSet, + search_cutoff_ms: Setting::NotSet, _kind: std::marker::PhantomData, }; settings.check() diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs index 2b8997847..a883f0ba0 100644 --- a/dump/src/reader/compat/v5_to_v6.rs +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -379,7 +379,7 @@ impl From> for v6::Settings { v5::Setting::NotSet => v6::Setting::NotSet, }, embedders: v6::Setting::NotSet, - search_cutoff: v6::Setting::NotSet, + search_cutoff_ms: v6::Setting::NotSet, _kind: std::marker::PhantomData, } } diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index d05201943..23fe98347 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -204,7 +204,7 @@ pub struct Settings { pub embedders: Setting>>, #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] - pub search_cutoff: Setting, + pub search_cutoff_ms: Setting, #[serde(skip)] #[deserr(skip)] @@ -230,7 +230,7 @@ impl Settings { faceting: Setting::Reset, pagination: Setting::Reset, embedders: Setting::Reset, - search_cutoff: Setting::Reset, + search_cutoff_ms: Setting::Reset, _kind: PhantomData, } } @@ -253,7 +253,7 @@ impl Settings { faceting, pagination, embedders, - search_cutoff, + search_cutoff_ms, .. } = self; @@ -274,7 +274,7 @@ impl Settings { faceting, pagination, embedders, - search_cutoff, + search_cutoff_ms, _kind: PhantomData, } } @@ -321,7 +321,7 @@ impl Settings { faceting: self.faceting, pagination: self.pagination, embedders: self.embedders, - search_cutoff: self.search_cutoff, + search_cutoff_ms: self.search_cutoff_ms, _kind: PhantomData, } } @@ -371,7 +371,7 @@ pub fn apply_settings_to_builder( faceting, pagination, embedders, - search_cutoff, + search_cutoff_ms, _kind, } = settings; @@ -548,7 +548,7 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match search_cutoff { + match search_cutoff_ms { Setting::Set(cutoff) => builder.set_search_cutoff(*cutoff), Setting::Reset => builder.reset_search_cutoff(), Setting::NotSet => (), @@ -641,7 +641,7 @@ pub fn settings( .collect(); let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) }; - let search_cutoff = index.search_cutoff(rtxn)?; + let search_cutoff_ms = index.search_cutoff(rtxn)?; Ok(Settings { displayed_attributes: match displayed_attributes { @@ -669,7 +669,7 @@ pub fn settings( faceting: Setting::Set(faceting), pagination: Setting::Set(pagination), embedders, - search_cutoff: match search_cutoff { + search_cutoff_ms: match search_cutoff_ms { Some(cutoff) => Setting::Set(cutoff), None => Setting::Reset, }, @@ -823,7 +823,7 @@ pub(crate) mod test { faceting: Setting::NotSet, pagination: Setting::NotSet, embedders: Setting::NotSet, - search_cutoff: Setting::NotSet, + search_cutoff_ms: Setting::NotSet, _kind: PhantomData::, }; @@ -850,7 +850,7 @@ pub(crate) mod test { faceting: Setting::NotSet, pagination: Setting::NotSet, embedders: Setting::NotSet, - search_cutoff: Setting::NotSet, + search_cutoff_ms: Setting::NotSet, _kind: PhantomData::, }; diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 41fc58a87..4c03eb1a1 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -626,19 +626,19 @@ fn embedder_analytics( } make_setting_route!( - "/search-cutoff", + "/search-cutoff-ms", put, u64, meilisearch_types::deserr::DeserrJsonError< meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoff, >, - search_cutoff, - "searchCutoff", + search_cutoff_ms, + "searchCutoffMs", analytics, |setting: &Option, req: &HttpRequest| { analytics.publish( "Search Cutoff Updated".to_string(), - serde_json::json!({"search_cutoff": setting }), + serde_json::json!({"search_cutoff_ms": setting }), Some(req), ); } @@ -675,7 +675,7 @@ generate_configure!( pagination, faceting, embedders, - search_cutoff + search_cutoff_ms ); pub async fn update_all( @@ -787,7 +787,7 @@ pub async fn update_all( "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()), }, "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()), - "search_cutoff": new_settings.search_cutoff.as_ref().set(), + "search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(), }), Some(&req), ); diff --git a/meilisearch/tests/common/mod.rs b/meilisearch/tests/common/mod.rs index 2b9e5e1d7..3117dd185 100644 --- a/meilisearch/tests/common/mod.rs +++ b/meilisearch/tests/common/mod.rs @@ -16,6 +16,7 @@ pub use server::{default_settings, Server}; pub struct Value(pub serde_json::Value); impl Value { + #[track_caller] pub fn uid(&self) -> u64 { if let Some(uid) = self["uid"].as_u64() { uid diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index 7bf97f8b2..1a31437f8 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -78,7 +78,7 @@ async fn import_dump_v1_movie_raw() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -240,7 +240,7 @@ async fn import_dump_v1_movie_with_settings() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -388,7 +388,7 @@ async fn import_dump_v1_rubygems_with_settings() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -522,7 +522,7 @@ async fn import_dump_v2_movie_raw() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -668,7 +668,7 @@ async fn import_dump_v2_movie_with_settings() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -813,7 +813,7 @@ async fn import_dump_v2_rubygems_with_settings() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -947,7 +947,7 @@ async fn import_dump_v3_movie_raw() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -1093,7 +1093,7 @@ async fn import_dump_v3_movie_with_settings() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -1238,7 +1238,7 @@ async fn import_dump_v3_rubygems_with_settings() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -1372,7 +1372,7 @@ async fn import_dump_v4_movie_raw() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -1518,7 +1518,7 @@ async fn import_dump_v4_movie_with_settings() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -1663,7 +1663,7 @@ async fn import_dump_v4_rubygems_with_settings() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "### ); @@ -1908,7 +1908,7 @@ async fn import_dump_v6_containing_experimental_features() { "pagination": { "maxTotalHits": 1000 }, - "searchCutoff": null + "searchCutoffMs": null } "###); diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 3e5c4278a..971539a31 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -843,7 +843,7 @@ async fn test_degraded_score_details() { index.add_documents(json!(documents), None).await; // We can't really use anything else than 0ms here; otherwise, the test will get flaky. - let (res, _code) = index.update_settings(json!({ "searchCutoff": 0 })).await; + let (res, _code) = index.update_settings(json!({ "searchCutoffMs": 0 })).await; index.wait_task(res.uid()).await; index @@ -855,7 +855,7 @@ async fn test_degraded_score_details() { }), |response, code| { meili_snap::snapshot!(code, @"200 OK"); - meili_snap::snapshot!(meili_snap::json_string!(response), @r###" + meili_snap::snapshot!(meili_snap::json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" { "hits": [ { @@ -905,7 +905,7 @@ async fn test_degraded_score_details() { } ], "query": "b", - "processingTimeMs": 0, + "processingTimeMs": "[duration]", "limit": 20, "offset": 0, "estimatedTotalHits": 3 diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index d573f38e0..09e38e55a 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -35,7 +35,7 @@ static DEFAULT_SETTINGS_VALUES: Lazy> = Lazy::new(| "maxTotalHits": json!(1000), }), ); - map.insert("search_cutoff", json!(null)); + map.insert("search_cutoff_ms", json!(null)); map }); @@ -85,7 +85,7 @@ async fn get_settings() { }) ); assert_eq!(settings["proximityPrecision"], json!("byWord")); - assert_eq!(settings["searchCutoff"], json!(null)); + assert_eq!(settings["searchCutoffMs"], json!(null)); } #[actix_rt::test] @@ -288,7 +288,7 @@ test_setting_routes!( synonyms put, pagination patch, faceting patch, - search_cutoff put + search_cutoff_ms put ); #[actix_rt::test] From 7bd881b9bcdeb2e84b9ec4870584d11efa580897 Mon Sep 17 00:00:00 2001 From: Tamo Date: Mon, 18 Mar 2024 18:39:05 +0100 Subject: [PATCH 08/16] adds the degraded searches to the prometheus dashboard --- assets/grafana-dashboard.json | 64 ++++++++++++++++++++++++ meilisearch/src/metrics.rs | 5 ++ meilisearch/src/routes/indexes/search.rs | 4 ++ 3 files changed, 73 insertions(+) diff --git a/assets/grafana-dashboard.json b/assets/grafana-dashboard.json index 37f7b1ca2..74a456b97 100644 --- a/assets/grafana-dashboard.json +++ b/assets/grafana-dashboard.json @@ -238,6 +238,70 @@ "title": "Total Searches (1h)", "type": "gauge" }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 26, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "round(increase(meilisearch_degraded_search_requests{job=\"$job\"}[1h]))", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total Degraded Searches (1h)", + "type": "gauge" + }, { "datasource": { "type": "prometheus" diff --git a/meilisearch/src/metrics.rs b/meilisearch/src/metrics.rs index bfe704979..652e6c227 100644 --- a/meilisearch/src/metrics.rs +++ b/meilisearch/src/metrics.rs @@ -22,6 +22,11 @@ lazy_static! { &["method", "path"] ) .expect("Can't create a metric"); + pub static ref MEILISEARCH_DEGRADED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!( + "meilisearch_degraded_search_requests", + "Meilisearch number of degraded search requests" + )) + .expect("Can't create a metric"); pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes")) .expect("Can't create a metric"); diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 3adfce970..6a430b6a3 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -17,6 +17,7 @@ use crate::analytics::{Analytics, SearchAggregator}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; use crate::search::{ add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, @@ -247,6 +248,9 @@ pub async fn search_with_post( .await?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); + if search_result.degraded { + MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc(); + } } analytics.post_search(aggregate); From 4369e9e97c47401066f4a2c076ebd717f0fccf5b Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 19 Mar 2024 11:14:28 +0100 Subject: [PATCH 09/16] add an error code test on the setting --- meilisearch-types/src/error.rs | 2 +- meilisearch-types/src/settings.rs | 2 +- meilisearch/src/routes/indexes/settings.rs | 3 +-- meilisearch/tests/common/index.rs | 5 ++++ meilisearch/tests/settings/errors.rs | 28 ++++++++++++++++++++++ 5 files changed, 36 insertions(+), 4 deletions(-) diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index bf9492ff6..aed77411a 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -259,7 +259,7 @@ InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ; InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ; -InvalidSettingsSearchCutoff , InvalidRequest , BAD_REQUEST ; +InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ; InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ; InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ; InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index 23fe98347..5480e72c6 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -203,7 +203,7 @@ pub struct Settings { #[deserr(default, error = DeserrJsonError)] pub embedders: Setting>>, #[serde(default, skip_serializing_if = "Setting::is_not_set")] - #[deserr(default, error = DeserrJsonError)] + #[deserr(default, error = DeserrJsonError)] pub search_cutoff_ms: Setting, #[serde(skip)] diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index 4c03eb1a1..5dabd7b0d 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -138,7 +138,6 @@ macro_rules! make_setting_route { debug!(returns = ?settings, "Update settings"); let mut json = serde_json::json!(&settings); - dbg!(&json); let val = json[$camelcase_attr].take(); Ok(HttpResponse::Ok().json(val)) @@ -630,7 +629,7 @@ make_setting_route!( put, u64, meilisearch_types::deserr::DeserrJsonError< - meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoff, + meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs, >, search_cutoff_ms, "searchCutoffMs", diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 16fc10e98..9ed6a6077 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -328,6 +328,11 @@ impl Index<'_> { self.service.patch_encoded(url, settings, self.encoder).await } + pub async fn update_settings_search_cutoff_ms(&self, settings: Value) -> (Value, StatusCode) { + let url = format!("/indexes/{}/settings/search-cutoff-ms", urlencode(self.uid.as_ref())); + self.service.put_encoded(url, settings, self.encoder).await + } + pub async fn delete_settings(&self) -> (Value, StatusCode) { let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref())); self.service.delete(url).await diff --git a/meilisearch/tests/settings/errors.rs b/meilisearch/tests/settings/errors.rs index 687cef1f8..2bd17d649 100644 --- a/meilisearch/tests/settings/errors.rs +++ b/meilisearch/tests/settings/errors.rs @@ -337,3 +337,31 @@ async fn settings_bad_pagination() { } "###); } + +#[actix_rt::test] +async fn settings_bad_search_cutoff_ms() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = index.update_settings(json!({ "searchCutoffMs": "doggo" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value type at `.searchCutoffMs`: expected a positive integer, but found a string: `\"doggo\"`", + "code": "invalid_settings_search_cutoff_ms", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_search_cutoff_ms" + } + "###); + + let (response, code) = index.update_settings_search_cutoff_ms(json!("doggo")).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value type: expected a positive integer, but found a string: `\"doggo\"`", + "code": "invalid_settings_search_cutoff_ms", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_search_cutoff_ms" + } + "###); +} From bfec9468d47414e7f260261504ed46a83c291e65 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 19 Mar 2024 14:49:15 +0100 Subject: [PATCH 10/16] Update milli/src/search/mod.rs Co-authored-by: Louis Dureuil --- milli/src/search/mod.rs | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index f6ab8a7de..b3dd0c091 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -260,7 +260,7 @@ impl fmt::Debug for Search<'_> { .field("words_limit", words_limit) .field("distribution_shift", distribution_shift) .field("embedder_name", embedder_name) - .field("time_bduget", time_budget) + .field("time_budget", time_budget) .finish() } } From 0ae39644f7d97e83c8edfb19f344cbb2eb24fc40 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 19 Mar 2024 15:07:06 +0100 Subject: [PATCH 11/16] fix the facet search --- meilisearch/src/search.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 0333eb0d5..3c00ca802 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -729,8 +729,11 @@ pub fn perform_facet_search( features: RoFeatures, ) -> Result { let before_search = Instant::now(); - let time_budget = TimeBudget::new(Duration::from_millis(150)); let rtxn = index.read_txn()?; + let time_budget = match index.search_cutoff(&rtxn)? { + Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), + None => TimeBudget::default(), + }; let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None, time_budget)?; From 7b9e0d29442df352a99aee7eab839464cd5764c1 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 19 Mar 2024 15:11:21 +0100 Subject: [PATCH 12/16] forward the degraded parameter to the hybrid search --- milli/src/search/hybrid.rs | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index 1e14f4430..47ac5f46b 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -10,6 +10,7 @@ struct ScoreWithRatioResult { matching_words: MatchingWords, candidates: RoaringBitmap, document_scores: Vec<(u32, ScoreWithRatio)>, + degraded: bool, } type ScoreWithRatio = (Vec, f32); @@ -72,6 +73,7 @@ impl ScoreWithRatioResult { matching_words: results.matching_words, candidates: results.candidates, document_scores, + degraded: results.degraded, } } @@ -106,7 +108,7 @@ impl ScoreWithRatioResult { candidates: left.candidates | right.candidates, documents_ids, document_scores, - degraded: false, + degraded: left.degraded | right.degraded, } } } From d8fe4fe49d12b36bd9b82963c7e2fcc278d2f894 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 19 Mar 2024 15:45:04 +0100 Subject: [PATCH 13/16] return the order in the score details --- meilisearch/tests/search/mod.rs | 12 +++++++++--- milli/src/score_details.rs | 8 +++----- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 971539a31..88470187a 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -869,7 +869,9 @@ async fn test_degraded_score_details() { ], "cattos": "pésti", "_rankingScoreDetails": { - "skipped": 0.0 + "skipped": { + "order": 0 + } } }, { @@ -883,7 +885,9 @@ async fn test_degraded_score_details() { "pestiféré" ], "_rankingScoreDetails": { - "skipped": 0.0 + "skipped": { + "order": 0 + } } }, { @@ -900,7 +904,9 @@ async fn test_degraded_score_details() { "gomez" ], "_rankingScoreDetails": { - "skipped": 0.0 + "skipped": { + "order": 0 + } } } ], diff --git a/milli/src/score_details.rs b/milli/src/score_details.rs index f2c6fb58a..08dfcdbb6 100644 --- a/milli/src/score_details.rs +++ b/milli/src/score_details.rs @@ -101,7 +101,7 @@ impl ScoreDetails { ScoreDetails::Vector(vector) => RankOrValue::Score( vector.value_similarity.as_ref().map(|(_, s)| *s as f64).unwrap_or(0.0f64), ), - ScoreDetails::Skipped => RankOrValue::Score(0.), + ScoreDetails::Skipped => RankOrValue::Rank(Rank { rank: 0, max_rank: 1 }), } } @@ -262,10 +262,8 @@ impl ScoreDetails { order += 1; } ScoreDetails::Skipped => { - details_map.insert( - "skipped".to_string(), - serde_json::Number::from_f64(0.).unwrap().into(), - ); + details_map + .insert("skipped".to_string(), serde_json::json!({ "order": order })); order += 1; } } From 098ab594eb156f5ba34ee4db81893f4e2c146b1f Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 19 Mar 2024 17:32:32 +0100 Subject: [PATCH 14/16] A score of 0.0 is now lesser than a sort result MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit handles the niche case 🐩 in the hybrid search where: 1. a sort ranking rule is the first rule. 2. the keyword search is skipped at the first rule. 3. the semantic search is not skipped at the first rule. Previously, we would have the skipped search winning, whereas we want the non skipped one winning. --- milli/src/search/hybrid.rs | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index 47ac5f46b..a8b7f0fcf 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -50,8 +50,12 @@ fn compare_scores( order => return order, } } - (Some(ScoreValue::Score(_)), Some(_)) => return Ordering::Greater, - (Some(_), Some(ScoreValue::Score(_))) => return Ordering::Less, + (Some(ScoreValue::Score(x)), Some(_)) => { + return if x == 0. { Ordering::Less } else { Ordering::Greater } + } + (Some(_), Some(ScoreValue::Score(x))) => { + return if x == 0. { Ordering::Greater } else { Ordering::Less } + } // if we have this, we're bad (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_))) | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => { From 2c3af8e51379b698276a23864c229cafc3984d77 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 19 Mar 2024 18:07:11 +0100 Subject: [PATCH 15/16] query the detailed score detail in the test --- milli/src/search/new/tests/cutoff.rs | 31 ++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/milli/src/search/new/tests/cutoff.rs b/milli/src/search/new/tests/cutoff.rs index 4256abc2b..664b139f3 100644 --- a/milli/src/search/new/tests/cutoff.rs +++ b/milli/src/search/new/tests/cutoff.rs @@ -10,6 +10,7 @@ use maplit::hashset; use meili_snap::snapshot; use crate::index::tests::TempIndex; +use crate::score_details::ScoringStrategy; use crate::{Criterion, Filter, Search, TimeBudget}; fn create_index() -> TempIndex { @@ -94,6 +95,7 @@ fn degraded_search_and_score_details() { let mut search = Search::new(&rtxn, &index); search.query("hello puppy kefir"); search.limit(4); + search.scoring_strategy(ScoringStrategy::Detailed); search.time_budget(TimeBudget::max()); let result = search.execute().unwrap(); @@ -140,6 +142,12 @@ fn degraded_search_and_score_details() { max_matching_words: 3, }, ), + Typo( + Typo { + typo_count: 2, + max_typo_count: 3, + }, + ), ], [ Words( @@ -148,6 +156,12 @@ fn degraded_search_and_score_details() { max_matching_words: 3, }, ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), ], ] "###); @@ -350,6 +364,12 @@ fn degraded_search_and_score_details() { max_matching_words: 3, }, ), + Typo( + Typo { + typo_count: 2, + max_typo_count: 3, + }, + ), ], [ Skipped, @@ -357,9 +377,9 @@ fn degraded_search_and_score_details() { ] "###); - // After FIVE loop iteration. The words ranking rule gave us a new bucket. + // After SIX loop iteration. The words ranking rule gave us a new bucket. // Since we reached the limit we were able to early exit without checking the typo ranking rule. - search.time_budget(TimeBudget::max().with_stop_after(5)); + search.time_budget(TimeBudget::max().with_stop_after(6)); let result = search.execute().unwrap(); snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" @@ -405,6 +425,12 @@ fn degraded_search_and_score_details() { max_matching_words: 3, }, ), + Typo( + Typo { + typo_count: 2, + max_typo_count: 3, + }, + ), ], [ Words( @@ -413,6 +439,7 @@ fn degraded_search_and_score_details() { max_matching_words: 3, }, ), + Skipped, ], ] "###); From 6079141ea6d77ac08a4b2c44ecc5f7fb07feb57f Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 19 Mar 2024 18:30:14 +0100 Subject: [PATCH 16/16] snapshot the scores side by side with the score details --- milli/src/search/new/tests/cutoff.rs | 69 +++++++++++----------------- 1 file changed, 26 insertions(+), 43 deletions(-) diff --git a/milli/src/search/new/tests/cutoff.rs b/milli/src/search/new/tests/cutoff.rs index 664b139f3..63b67f2e7 100644 --- a/milli/src/search/new/tests/cutoff.rs +++ b/milli/src/search/new/tests/cutoff.rs @@ -10,7 +10,7 @@ use maplit::hashset; use meili_snap::snapshot; use crate::index::tests::TempIndex; -use crate::score_details::ScoringStrategy; +use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::{Criterion, Filter, Search, TimeBudget}; fn create_index() -> TempIndex { @@ -88,6 +88,7 @@ fn degraded_search_cannot_skip_filter() { } #[test] +#[allow(clippy::format_collect)] // the test is already quite big fn degraded_search_and_score_details() { let index = create_index(); let rtxn = index.read_txn().unwrap(); @@ -99,13 +100,10 @@ fn degraded_search_and_score_details() { search.time_budget(TimeBudget::max()); let result = search.execute().unwrap(); - snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" - [ - 4, - 1, - 0, - 3, - ] + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [4, 1, 0, 3] + Scores: 1.0000 0.9167 0.8333 0.6667 + Score Details: [ [ Words( @@ -170,13 +168,10 @@ fn degraded_search_and_score_details() { search.time_budget(TimeBudget::max().with_stop_after(1)); let result = search.execute().unwrap(); - snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" - [ - 0, - 1, - 4, - 2, - ] + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [0, 1, 4, 2] + Scores: 0.6667 0.6667 0.6667 0.0000 + Score Details: [ [ Words( @@ -215,13 +210,10 @@ fn degraded_search_and_score_details() { search.time_budget(TimeBudget::max().with_stop_after(2)); let result = search.execute().unwrap(); - snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" - [ - 4, - 0, - 1, - 2, - ] + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [4, 0, 1, 2] + Scores: 1.0000 0.6667 0.6667 0.0000 + Score Details: [ [ Words( @@ -265,13 +257,10 @@ fn degraded_search_and_score_details() { search.time_budget(TimeBudget::max().with_stop_after(3)); let result = search.execute().unwrap(); - snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" - [ - 4, - 1, - 0, - 2, - ] + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [4, 1, 0, 2] + Scores: 1.0000 0.9167 0.6667 0.0000 + Score Details: [ [ Words( @@ -321,13 +310,10 @@ fn degraded_search_and_score_details() { search.time_budget(TimeBudget::max().with_stop_after(4)); let result = search.execute().unwrap(); - snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" - [ - 4, - 1, - 0, - 2, - ] + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [4, 1, 0, 2] + Scores: 1.0000 0.9167 0.8333 0.0000 + Score Details: [ [ Words( @@ -382,13 +368,10 @@ fn degraded_search_and_score_details() { search.time_budget(TimeBudget::max().with_stop_after(6)); let result = search.execute().unwrap(); - snapshot!(format!("{:#?}\n{:#?}", result.documents_ids, result.document_scores), @r###" - [ - 4, - 1, - 0, - 3, - ] + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [4, 1, 0, 3] + Scores: 1.0000 0.9167 0.8333 0.3333 + Score Details: [ [ Words(