diff --git a/assets/grafana-dashboard.json b/assets/grafana-dashboard.json index 37f7b1ca2..74a456b97 100644 --- a/assets/grafana-dashboard.json +++ b/assets/grafana-dashboard.json @@ -238,6 +238,70 @@ "title": "Total Searches (1h)", "type": "gauge" }, + { + "datasource": { + "type": "prometheus" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 80 + } + ] + } + }, + "overrides": [] + }, + "gridPos": { + "h": 6, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 26, + "options": { + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ], + "fields": "", + "values": false + }, + "showThresholdLabels": false, + "showThresholdMarkers": true, + "text": {} + }, + "pluginVersion": "9.5.2", + "targets": [ + { + "datasource": { + "type": "prometheus" + }, + "editorMode": "builder", + "exemplar": true, + "expr": "round(increase(meilisearch_degraded_search_requests{job=\"$job\"}[1h]))", + "interval": "", + "legendFormat": "", + "range": true, + "refId": "A" + } + ], + "title": "Total Degraded Searches (1h)", + "type": "gauge" + }, { "datasource": { "type": "prometheus" diff --git a/dump/src/lib.rs b/dump/src/lib.rs index be0053a7c..a7af2d5d0 100644 --- a/dump/src/lib.rs +++ b/dump/src/lib.rs @@ -277,6 +277,7 @@ pub(crate) mod test { }), pagination: Setting::NotSet, embedders: Setting::NotSet, + search_cutoff_ms: Setting::NotSet, _kind: std::marker::PhantomData, }; settings.check() diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs index e00d3a599..a883f0ba0 100644 --- a/dump/src/reader/compat/v5_to_v6.rs +++ b/dump/src/reader/compat/v5_to_v6.rs @@ -379,6 +379,7 @@ impl From> for v6::Settings { v5::Setting::NotSet => v6::Setting::NotSet, }, embedders: v6::Setting::NotSet, + search_cutoff_ms: v6::Setting::NotSet, _kind: std::marker::PhantomData, } } diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs index 965d2e672..aed77411a 100644 --- a/meilisearch-types/src/error.rs +++ b/meilisearch-types/src/error.rs @@ -259,6 +259,7 @@ InvalidSettingsProximityPrecision , InvalidRequest , BAD_REQUEST ; InvalidSettingsFaceting , InvalidRequest , BAD_REQUEST ; InvalidSettingsFilterableAttributes , InvalidRequest , BAD_REQUEST ; InvalidSettingsPagination , InvalidRequest , BAD_REQUEST ; +InvalidSettingsSearchCutoffMs , InvalidRequest , BAD_REQUEST ; InvalidSettingsEmbedders , InvalidRequest , BAD_REQUEST ; InvalidSettingsRankingRules , InvalidRequest , BAD_REQUEST ; InvalidSettingsSearchableAttributes , InvalidRequest , BAD_REQUEST ; diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs index ca46abb0c..5480e72c6 100644 --- a/meilisearch-types/src/settings.rs +++ b/meilisearch-types/src/settings.rs @@ -202,6 +202,9 @@ pub struct Settings { #[serde(default, skip_serializing_if = "Setting::is_not_set")] #[deserr(default, error = DeserrJsonError)] pub embedders: Setting>>, + #[serde(default, skip_serializing_if = "Setting::is_not_set")] + #[deserr(default, error = DeserrJsonError)] + pub search_cutoff_ms: Setting, #[serde(skip)] #[deserr(skip)] @@ -227,6 +230,7 @@ impl Settings { faceting: Setting::Reset, pagination: Setting::Reset, embedders: Setting::Reset, + search_cutoff_ms: Setting::Reset, _kind: PhantomData, } } @@ -249,6 +253,7 @@ impl Settings { faceting, pagination, embedders, + search_cutoff_ms, .. } = self; @@ -269,6 +274,7 @@ impl Settings { faceting, pagination, embedders, + search_cutoff_ms, _kind: PhantomData, } } @@ -315,6 +321,7 @@ impl Settings { faceting: self.faceting, pagination: self.pagination, embedders: self.embedders, + search_cutoff_ms: self.search_cutoff_ms, _kind: PhantomData, } } @@ -347,19 +354,40 @@ pub fn apply_settings_to_builder( settings: &Settings, builder: &mut milli::update::Settings, ) { - match settings.searchable_attributes { + let Settings { + displayed_attributes, + searchable_attributes, + filterable_attributes, + sortable_attributes, + ranking_rules, + stop_words, + non_separator_tokens, + separator_tokens, + dictionary, + synonyms, + distinct_attribute, + proximity_precision, + typo_tolerance, + faceting, + pagination, + embedders, + search_cutoff_ms, + _kind, + } = settings; + + match searchable_attributes { Setting::Set(ref names) => builder.set_searchable_fields(names.clone()), Setting::Reset => builder.reset_searchable_fields(), Setting::NotSet => (), } - match settings.displayed_attributes { + match displayed_attributes { Setting::Set(ref names) => builder.set_displayed_fields(names.clone()), Setting::Reset => builder.reset_displayed_fields(), Setting::NotSet => (), } - match settings.filterable_attributes { + match filterable_attributes { Setting::Set(ref facets) => { builder.set_filterable_fields(facets.clone().into_iter().collect()) } @@ -367,13 +395,13 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.sortable_attributes { + match sortable_attributes { Setting::Set(ref fields) => builder.set_sortable_fields(fields.iter().cloned().collect()), Setting::Reset => builder.reset_sortable_fields(), Setting::NotSet => (), } - match settings.ranking_rules { + match ranking_rules { Setting::Set(ref criteria) => { builder.set_criteria(criteria.iter().map(|c| c.clone().into()).collect()) } @@ -381,13 +409,13 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.stop_words { + match stop_words { Setting::Set(ref stop_words) => builder.set_stop_words(stop_words.clone()), Setting::Reset => builder.reset_stop_words(), Setting::NotSet => (), } - match settings.non_separator_tokens { + match non_separator_tokens { Setting::Set(ref non_separator_tokens) => { builder.set_non_separator_tokens(non_separator_tokens.clone()) } @@ -395,7 +423,7 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.separator_tokens { + match separator_tokens { Setting::Set(ref separator_tokens) => { builder.set_separator_tokens(separator_tokens.clone()) } @@ -403,31 +431,31 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.dictionary { + match dictionary { Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()), Setting::Reset => builder.reset_dictionary(), Setting::NotSet => (), } - match settings.synonyms { + match synonyms { Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()), Setting::Reset => builder.reset_synonyms(), Setting::NotSet => (), } - match settings.distinct_attribute { + match distinct_attribute { Setting::Set(ref attr) => builder.set_distinct_field(attr.clone()), Setting::Reset => builder.reset_distinct_field(), Setting::NotSet => (), } - match settings.proximity_precision { + match proximity_precision { Setting::Set(ref precision) => builder.set_proximity_precision((*precision).into()), Setting::Reset => builder.reset_proximity_precision(), Setting::NotSet => (), } - match settings.typo_tolerance { + match typo_tolerance { Setting::Set(ref value) => { match value.enabled { Setting::Set(val) => builder.set_autorize_typos(val), @@ -482,7 +510,7 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match &settings.faceting { + match faceting { Setting::Set(FacetingSettings { max_values_per_facet, sort_facet_values_by }) => { match max_values_per_facet { Setting::Set(val) => builder.set_max_values_per_facet(*val), @@ -504,7 +532,7 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.pagination { + match pagination { Setting::Set(ref value) => match value.max_total_hits { Setting::Set(val) => builder.set_pagination_max_total_hits(val), Setting::Reset => builder.reset_pagination_max_total_hits(), @@ -514,11 +542,17 @@ pub fn apply_settings_to_builder( Setting::NotSet => (), } - match settings.embedders.clone() { - Setting::Set(value) => builder.set_embedder_settings(value), + match embedders { + Setting::Set(value) => builder.set_embedder_settings(value.clone()), Setting::Reset => builder.reset_embedder_settings(), Setting::NotSet => (), } + + match search_cutoff_ms { + Setting::Set(cutoff) => builder.set_search_cutoff(*cutoff), + Setting::Reset => builder.reset_search_cutoff(), + Setting::NotSet => (), + } } pub fn settings( @@ -607,6 +641,8 @@ pub fn settings( .collect(); let embedders = if embedders.is_empty() { Setting::NotSet } else { Setting::Set(embedders) }; + let search_cutoff_ms = index.search_cutoff(rtxn)?; + Ok(Settings { displayed_attributes: match displayed_attributes { Some(attrs) => Setting::Set(attrs), @@ -633,6 +669,10 @@ pub fn settings( faceting: Setting::Set(faceting), pagination: Setting::Set(pagination), embedders, + search_cutoff_ms: match search_cutoff_ms { + Some(cutoff) => Setting::Set(cutoff), + None => Setting::Reset, + }, _kind: PhantomData, }) } @@ -783,6 +823,7 @@ pub(crate) mod test { faceting: Setting::NotSet, pagination: Setting::NotSet, embedders: Setting::NotSet, + search_cutoff_ms: Setting::NotSet, _kind: PhantomData::, }; @@ -809,6 +850,7 @@ pub(crate) mod test { faceting: Setting::NotSet, pagination: Setting::NotSet, embedders: Setting::NotSet, + search_cutoff_ms: Setting::NotSet, _kind: PhantomData::, }; diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs index 7dfc52900..99298bd43 100644 --- a/meilisearch/src/analytics/segment_analytics.rs +++ b/meilisearch/src/analytics/segment_analytics.rs @@ -579,6 +579,7 @@ pub struct SearchAggregator { // requests total_received: usize, total_succeeded: usize, + total_degraded: usize, time_spent: BinaryHeap, // sort @@ -758,9 +759,13 @@ impl SearchAggregator { hits_info: _, facet_distribution: _, facet_stats: _, + degraded, } = result; self.total_succeeded = self.total_succeeded.saturating_add(1); + if *degraded { + self.total_degraded = self.total_degraded.saturating_add(1); + } self.time_spent.push(*processing_time_ms as usize); } @@ -802,6 +807,7 @@ impl SearchAggregator { semantic_ratio, embedder, hybrid, + total_degraded, } = other; if self.timestamp.is_none() { @@ -816,6 +822,7 @@ impl SearchAggregator { // request self.total_received = self.total_received.saturating_add(total_received); self.total_succeeded = self.total_succeeded.saturating_add(total_succeeded); + self.total_degraded = self.total_degraded.saturating_add(total_degraded); self.time_spent.append(time_spent); // sort @@ -921,6 +928,7 @@ impl SearchAggregator { semantic_ratio, embedder, hybrid, + total_degraded, } = self; if total_received == 0 { @@ -940,6 +948,7 @@ impl SearchAggregator { "total_succeeded": total_succeeded, "total_failed": total_received.saturating_sub(total_succeeded), // just to be sure we never panics "total_received": total_received, + "total_degraded": total_degraded, }, "sort": { "with_geoPoint": sort_with_geo_point, diff --git a/meilisearch/src/metrics.rs b/meilisearch/src/metrics.rs index bfe704979..652e6c227 100644 --- a/meilisearch/src/metrics.rs +++ b/meilisearch/src/metrics.rs @@ -22,6 +22,11 @@ lazy_static! { &["method", "path"] ) .expect("Can't create a metric"); + pub static ref MEILISEARCH_DEGRADED_SEARCH_REQUESTS: IntGauge = register_int_gauge!(opts!( + "meilisearch_degraded_search_requests", + "Meilisearch number of degraded search requests" + )) + .expect("Can't create a metric"); pub static ref MEILISEARCH_DB_SIZE_BYTES: IntGauge = register_int_gauge!(opts!("meilisearch_db_size_bytes", "Meilisearch DB Size In Bytes")) .expect("Can't create a metric"); diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs index 3adfce970..6a430b6a3 100644 --- a/meilisearch/src/routes/indexes/search.rs +++ b/meilisearch/src/routes/indexes/search.rs @@ -17,6 +17,7 @@ use crate::analytics::{Analytics, SearchAggregator}; use crate::extractors::authentication::policies::*; use crate::extractors::authentication::GuardedData; use crate::extractors::sequential_extractor::SeqHandler; +use crate::metrics::MEILISEARCH_DEGRADED_SEARCH_REQUESTS; use crate::search::{ add_search_rules, perform_search, HybridQuery, MatchingStrategy, SearchQuery, SemanticRatio, DEFAULT_CROP_LENGTH, DEFAULT_CROP_MARKER, DEFAULT_HIGHLIGHT_POST_TAG, @@ -247,6 +248,9 @@ pub async fn search_with_post( .await?; if let Ok(ref search_result) = search_result { aggregate.succeed(search_result); + if search_result.degraded { + MEILISEARCH_DEGRADED_SEARCH_REQUESTS.inc(); + } } analytics.post_search(aggregate); diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs index c782e78cb..5dabd7b0d 100644 --- a/meilisearch/src/routes/indexes/settings.rs +++ b/meilisearch/src/routes/indexes/settings.rs @@ -624,6 +624,25 @@ fn embedder_analytics( ) } +make_setting_route!( + "/search-cutoff-ms", + put, + u64, + meilisearch_types::deserr::DeserrJsonError< + meilisearch_types::error::deserr_codes::InvalidSettingsSearchCutoffMs, + >, + search_cutoff_ms, + "searchCutoffMs", + analytics, + |setting: &Option, req: &HttpRequest| { + analytics.publish( + "Search Cutoff Updated".to_string(), + serde_json::json!({"search_cutoff_ms": setting }), + Some(req), + ); + } +); + macro_rules! generate_configure { ($($mod:ident),*) => { pub fn configure(cfg: &mut web::ServiceConfig) { @@ -654,7 +673,8 @@ generate_configure!( typo_tolerance, pagination, faceting, - embedders + embedders, + search_cutoff_ms ); pub async fn update_all( @@ -765,7 +785,8 @@ pub async fn update_all( "synonyms": { "total": new_settings.synonyms.as_ref().set().map(|synonyms| synonyms.len()), }, - "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()) + "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()), + "search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(), }), Some(&req), ); diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index e65192d16..3c00ca802 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -1,7 +1,7 @@ use std::cmp::min; use std::collections::{BTreeMap, BTreeSet, HashSet}; use std::str::FromStr; -use std::time::Instant; +use std::time::{Duration, Instant}; use deserr::Deserr; use either::Either; @@ -14,7 +14,7 @@ use meilisearch_types::heed::RoTxn; use meilisearch_types::index_uid::IndexUid; use meilisearch_types::milli::score_details::{self, ScoreDetails, ScoringStrategy}; use meilisearch_types::milli::vector::DistributionShift; -use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues}; +use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget}; use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS; use meilisearch_types::{milli, Document}; use milli::tokenizer::TokenizerBuilder; @@ -323,6 +323,10 @@ pub struct SearchResult { pub facet_distribution: Option>>, #[serde(skip_serializing_if = "Option::is_none")] pub facet_stats: Option>, + + // This information is only used for analytics purposes + #[serde(skip)] + pub degraded: bool, } #[derive(Serialize, Debug, Clone, PartialEq)] @@ -382,8 +386,10 @@ fn prepare_search<'t>( query: &'t SearchQuery, features: RoFeatures, distribution: Option, + time_budget: TimeBudget, ) -> Result<(milli::Search<'t>, bool, usize, usize), MeilisearchHttpError> { let mut search = index.search(rtxn); + search.time_budget(time_budget); if query.vector.is_some() { features.check_vector("Passing `vector` as a query parameter")?; @@ -492,18 +498,28 @@ pub fn perform_search( ) -> Result { let before_search = Instant::now(); let rtxn = index.read_txn()?; + let time_budget = match index.search_cutoff(&rtxn)? { + Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), + None => TimeBudget::default(), + }; let (search, is_finite_pagination, max_total_hits, offset) = - prepare_search(index, &rtxn, &query, features, distribution)?; + prepare_search(index, &rtxn, &query, features, distribution, time_budget)?; - let milli::SearchResult { documents_ids, matching_words, candidates, document_scores, .. } = - match &query.hybrid { - Some(hybrid) => match *hybrid.semantic_ratio { - ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?, - ratio => search.execute_hybrid(ratio)?, - }, - None => search.execute()?, - }; + let milli::SearchResult { + documents_ids, + matching_words, + candidates, + document_scores, + degraded, + .. + } = match &query.hybrid { + Some(hybrid) => match *hybrid.semantic_ratio { + ratio if ratio == 0.0 || ratio == 1.0 => search.execute()?, + ratio => search.execute_hybrid(ratio)?, + }, + None => search.execute()?, + }; let fields_ids_map = index.fields_ids_map(&rtxn).unwrap(); @@ -700,6 +716,7 @@ pub fn perform_search( processing_time_ms: before_search.elapsed().as_millis(), facet_distribution, facet_stats, + degraded, }; Ok(result) } @@ -713,8 +730,13 @@ pub fn perform_facet_search( ) -> Result { let before_search = Instant::now(); let rtxn = index.read_txn()?; + let time_budget = match index.search_cutoff(&rtxn)? { + Some(cutoff) => TimeBudget::new(Duration::from_millis(cutoff)), + None => TimeBudget::default(), + }; - let (search, _, _, _) = prepare_search(index, &rtxn, &search_query, features, None)?; + let (search, _, _, _) = + prepare_search(index, &rtxn, &search_query, features, None, time_budget)?; let mut facet_search = SearchForFacetValues::new(facet_name, search, search_query.hybrid.is_some()); if let Some(facet_query) = &facet_query { diff --git a/meilisearch/tests/common/index.rs b/meilisearch/tests/common/index.rs index 16fc10e98..9ed6a6077 100644 --- a/meilisearch/tests/common/index.rs +++ b/meilisearch/tests/common/index.rs @@ -328,6 +328,11 @@ impl Index<'_> { self.service.patch_encoded(url, settings, self.encoder).await } + pub async fn update_settings_search_cutoff_ms(&self, settings: Value) -> (Value, StatusCode) { + let url = format!("/indexes/{}/settings/search-cutoff-ms", urlencode(self.uid.as_ref())); + self.service.put_encoded(url, settings, self.encoder).await + } + pub async fn delete_settings(&self) -> (Value, StatusCode) { let url = format!("/indexes/{}/settings", urlencode(self.uid.as_ref())); self.service.delete(url).await diff --git a/meilisearch/tests/common/mod.rs b/meilisearch/tests/common/mod.rs index 2b9e5e1d7..3117dd185 100644 --- a/meilisearch/tests/common/mod.rs +++ b/meilisearch/tests/common/mod.rs @@ -16,6 +16,7 @@ pub use server::{default_settings, Server}; pub struct Value(pub serde_json::Value); impl Value { + #[track_caller] pub fn uid(&self) -> u64 { if let Some(uid) = self["uid"].as_u64() { uid diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs index e8061ae4a..1a31437f8 100644 --- a/meilisearch/tests/dumps/mod.rs +++ b/meilisearch/tests/dumps/mod.rs @@ -77,7 +77,8 @@ async fn import_dump_v1_movie_raw() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -238,7 +239,8 @@ async fn import_dump_v1_movie_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -385,7 +387,8 @@ async fn import_dump_v1_rubygems_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -518,7 +521,8 @@ async fn import_dump_v2_movie_raw() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -663,7 +667,8 @@ async fn import_dump_v2_movie_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -807,7 +812,8 @@ async fn import_dump_v2_rubygems_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -940,7 +946,8 @@ async fn import_dump_v3_movie_raw() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -1085,7 +1092,8 @@ async fn import_dump_v3_movie_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -1229,7 +1237,8 @@ async fn import_dump_v3_rubygems_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -1362,7 +1371,8 @@ async fn import_dump_v4_movie_raw() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -1507,7 +1517,8 @@ async fn import_dump_v4_movie_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -1651,7 +1662,8 @@ async fn import_dump_v4_rubygems_with_settings() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "### ); @@ -1895,7 +1907,8 @@ async fn import_dump_v6_containing_experimental_features() { }, "pagination": { "maxTotalHits": 1000 - } + }, + "searchCutoffMs": null } "###); diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 90098c5b6..88470187a 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -834,6 +834,94 @@ async fn test_score_details() { .await; } +#[actix_rt::test] +async fn test_degraded_score_details() { + let server = Server::new().await; + let index = server.index("test"); + + let documents = NESTED_DOCUMENTS.clone(); + + index.add_documents(json!(documents), None).await; + // We can't really use anything else than 0ms here; otherwise, the test will get flaky. + let (res, _code) = index.update_settings(json!({ "searchCutoffMs": 0 })).await; + index.wait_task(res.uid()).await; + + index + .search( + json!({ + "q": "b", + "attributesToRetrieve": ["doggos.name", "cattos"], + "showRankingScoreDetails": true, + }), + |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response, { ".processingTimeMs" => "[duration]" }), @r###" + { + "hits": [ + { + "doggos": [ + { + "name": "bobby" + }, + { + "name": "buddy" + } + ], + "cattos": "pésti", + "_rankingScoreDetails": { + "skipped": { + "order": 0 + } + } + }, + { + "doggos": [ + { + "name": "gros bill" + } + ], + "cattos": [ + "simba", + "pestiféré" + ], + "_rankingScoreDetails": { + "skipped": { + "order": 0 + } + } + }, + { + "doggos": [ + { + "name": "turbo" + }, + { + "name": "fast" + } + ], + "cattos": [ + "moumoute", + "gomez" + ], + "_rankingScoreDetails": { + "skipped": { + "order": 0 + } + } + } + ], + "query": "b", + "processingTimeMs": "[duration]", + "limit": 20, + "offset": 0, + "estimatedTotalHits": 3 + } + "###); + }, + ) + .await; +} + #[actix_rt::test] async fn experimental_feature_vector_store() { let server = Server::new().await; diff --git a/meilisearch/tests/settings/errors.rs b/meilisearch/tests/settings/errors.rs index 687cef1f8..2bd17d649 100644 --- a/meilisearch/tests/settings/errors.rs +++ b/meilisearch/tests/settings/errors.rs @@ -337,3 +337,31 @@ async fn settings_bad_pagination() { } "###); } + +#[actix_rt::test] +async fn settings_bad_search_cutoff_ms() { + let server = Server::new().await; + let index = server.index("test"); + + let (response, code) = index.update_settings(json!({ "searchCutoffMs": "doggo" })).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value type at `.searchCutoffMs`: expected a positive integer, but found a string: `\"doggo\"`", + "code": "invalid_settings_search_cutoff_ms", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_search_cutoff_ms" + } + "###); + + let (response, code) = index.update_settings_search_cutoff_ms(json!("doggo")).await; + snapshot!(code, @"400 Bad Request"); + snapshot!(json_string!(response), @r###" + { + "message": "Invalid value type: expected a positive integer, but found a string: `\"doggo\"`", + "code": "invalid_settings_search_cutoff_ms", + "type": "invalid_request", + "link": "https://docs.meilisearch.com/errors#invalid_settings_search_cutoff_ms" + } + "###); +} diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs index 5642e854f..09e38e55a 100644 --- a/meilisearch/tests/settings/get_settings.rs +++ b/meilisearch/tests/settings/get_settings.rs @@ -35,6 +35,7 @@ static DEFAULT_SETTINGS_VALUES: Lazy> = Lazy::new(| "maxTotalHits": json!(1000), }), ); + map.insert("search_cutoff_ms", json!(null)); map }); @@ -49,12 +50,12 @@ async fn get_settings_unexisting_index() { async fn get_settings() { let server = Server::new().await; let index = server.index("test"); - index.create(None).await; - index.wait_task(0).await; + let (response, _code) = index.create(None).await; + index.wait_task(response.uid()).await; let (response, code) = index.settings().await; assert_eq!(code, 200); let settings = response.as_object().unwrap(); - assert_eq!(settings.keys().len(), 15); + assert_eq!(settings.keys().len(), 16); assert_eq!(settings["displayedAttributes"], json!(["*"])); assert_eq!(settings["searchableAttributes"], json!(["*"])); assert_eq!(settings["filterableAttributes"], json!([])); @@ -84,6 +85,7 @@ async fn get_settings() { }) ); assert_eq!(settings["proximityPrecision"], json!("byWord")); + assert_eq!(settings["searchCutoffMs"], json!(null)); } #[actix_rt::test] @@ -285,7 +287,8 @@ test_setting_routes!( ranking_rules put, synonyms put, pagination patch, - faceting patch + faceting patch, + search_cutoff_ms put ); #[actix_rt::test] diff --git a/milli/examples/search.rs b/milli/examples/search.rs index a94677771..8640acf42 100644 --- a/milli/examples/search.rs +++ b/milli/examples/search.rs @@ -6,7 +6,7 @@ use std::time::Instant; use heed::EnvOpenOptions; use milli::{ execute_search, filtered_universe, DefaultSearchLogger, GeoSortStrategy, Index, SearchContext, - SearchLogger, TermsMatchingStrategy, + SearchLogger, TermsMatchingStrategy, TimeBudget, }; #[global_allocator] @@ -65,6 +65,7 @@ fn main() -> Result<(), Box> { None, &mut DefaultSearchLogger, logger, + TimeBudget::max(), )?; if let Some((logger, dir)) = detailed_logger { logger.finish(&mut ctx, Path::new(dir))?; diff --git a/milli/src/index.rs b/milli/src/index.rs index 2c3977403..d921de9e4 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -67,6 +67,7 @@ pub mod main_key { pub const PAGINATION_MAX_TOTAL_HITS: &str = "pagination-max-total-hits"; pub const PROXIMITY_PRECISION: &str = "proximity-precision"; pub const EMBEDDING_CONFIGS: &str = "embedding_configs"; + pub const SEARCH_CUTOFF: &str = "search_cutoff"; } pub mod db_name { @@ -1505,6 +1506,18 @@ impl Index { _ => "default".to_owned(), }) } + + pub(crate) fn put_search_cutoff(&self, wtxn: &mut RwTxn<'_>, cutoff: u64) -> heed::Result<()> { + self.main.remap_types::().put(wtxn, main_key::SEARCH_CUTOFF, &cutoff) + } + + pub fn search_cutoff(&self, rtxn: &RoTxn<'_>) -> Result> { + Ok(self.main.remap_types::().get(rtxn, main_key::SEARCH_CUTOFF)?) + } + + pub(crate) fn delete_search_cutoff(&self, wtxn: &mut RwTxn<'_>) -> heed::Result { + self.main.remap_key_type::().delete(wtxn, main_key::SEARCH_CUTOFF) + } } #[cfg(test)] @@ -2421,6 +2434,7 @@ pub(crate) mod tests { candidates: _, document_scores: _, mut documents_ids, + degraded: _, } = search.execute().unwrap(); let primary_key_id = index.fields_ids_map(&rtxn).unwrap().id("primary_key").unwrap(); documents_ids.sort_unstable(); diff --git a/milli/src/lib.rs b/milli/src/lib.rs index 5effcea3d..df44ca127 100644 --- a/milli/src/lib.rs +++ b/milli/src/lib.rs @@ -30,6 +30,7 @@ pub mod snapshot_tests; use std::collections::{BTreeMap, HashMap}; use std::convert::{TryFrom, TryInto}; +use std::fmt; use std::hash::BuildHasherDefault; use charabia::normalizer::{CharNormalizer, CompatibilityDecompositionNormalizer}; @@ -104,6 +105,73 @@ pub const MAX_WORD_LENGTH: usize = MAX_LMDB_KEY_LENGTH / 2; pub const MAX_POSITION_PER_ATTRIBUTE: u32 = u16::MAX as u32 + 1; +#[derive(Clone)] +pub struct TimeBudget { + started_at: std::time::Instant, + budget: std::time::Duration, + + /// When testing the time budget, ensuring we did more than iteration of the bucket sort can be useful. + /// But to avoid being flaky, the only option is to add the ability to stop after a specific number of calls instead of a `Duration`. + #[cfg(test)] + stop_after: Option<(std::sync::Arc, usize)>, +} + +impl fmt::Debug for TimeBudget { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.debug_struct("TimeBudget") + .field("started_at", &self.started_at) + .field("budget", &self.budget) + .field("left", &(self.budget - self.started_at.elapsed())) + .finish() + } +} + +impl Default for TimeBudget { + fn default() -> Self { + Self::new(std::time::Duration::from_millis(150)) + } +} + +impl TimeBudget { + pub fn new(budget: std::time::Duration) -> Self { + Self { + started_at: std::time::Instant::now(), + budget, + + #[cfg(test)] + stop_after: None, + } + } + + pub fn max() -> Self { + Self::new(std::time::Duration::from_secs(u64::MAX)) + } + + #[cfg(test)] + pub fn with_stop_after(mut self, stop_after: usize) -> Self { + use std::sync::atomic::AtomicUsize; + use std::sync::Arc; + + self.stop_after = Some((Arc::new(AtomicUsize::new(0)), stop_after)); + self + } + + pub fn exceeded(&self) -> bool { + #[cfg(test)] + if let Some((current, stop_after)) = &self.stop_after { + let current = current.fetch_add(1, std::sync::atomic::Ordering::Relaxed); + if current >= *stop_after { + return true; + } else { + // if a number has been specified then we ignore entirely the time budget + return false; + } + } + + self.started_at.elapsed() > self.budget + } +} + // Convert an absolute word position into a relative position. // Return the field id of the attribute related to the absolute position // and the relative position in the attribute. diff --git a/milli/src/score_details.rs b/milli/src/score_details.rs index f6b9db58c..08dfcdbb6 100644 --- a/milli/src/score_details.rs +++ b/milli/src/score_details.rs @@ -17,6 +17,9 @@ pub enum ScoreDetails { Sort(Sort), Vector(Vector), GeoSort(GeoSort), + + /// Returned when we don't have the time to finish applying all the subsequent ranking-rules + Skipped, } #[derive(Clone, Copy)] @@ -50,6 +53,7 @@ impl ScoreDetails { ScoreDetails::Sort(_) => None, ScoreDetails::GeoSort(_) => None, ScoreDetails::Vector(_) => None, + ScoreDetails::Skipped => Some(Rank { rank: 0, max_rank: 1 }), } } @@ -97,6 +101,7 @@ impl ScoreDetails { ScoreDetails::Vector(vector) => RankOrValue::Score( vector.value_similarity.as_ref().map(|(_, s)| *s as f64).unwrap_or(0.0f64), ), + ScoreDetails::Skipped => RankOrValue::Rank(Rank { rank: 0, max_rank: 1 }), } } @@ -256,6 +261,11 @@ impl ScoreDetails { details_map.insert(vector, details); order += 1; } + ScoreDetails::Skipped => { + details_map + .insert("skipped".to_string(), serde_json::json!({ "order": order })); + order += 1; + } } } details_map diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs index b4c79f7f5..a8b7f0fcf 100644 --- a/milli/src/search/hybrid.rs +++ b/milli/src/search/hybrid.rs @@ -10,6 +10,7 @@ struct ScoreWithRatioResult { matching_words: MatchingWords, candidates: RoaringBitmap, document_scores: Vec<(u32, ScoreWithRatio)>, + degraded: bool, } type ScoreWithRatio = (Vec, f32); @@ -49,8 +50,12 @@ fn compare_scores( order => return order, } } - (Some(ScoreValue::Score(_)), Some(_)) => return Ordering::Greater, - (Some(_), Some(ScoreValue::Score(_))) => return Ordering::Less, + (Some(ScoreValue::Score(x)), Some(_)) => { + return if x == 0. { Ordering::Less } else { Ordering::Greater } + } + (Some(_), Some(ScoreValue::Score(x))) => { + return if x == 0. { Ordering::Greater } else { Ordering::Less } + } // if we have this, we're bad (Some(ScoreValue::GeoSort(_)), Some(ScoreValue::Sort(_))) | (Some(ScoreValue::Sort(_)), Some(ScoreValue::GeoSort(_))) => { @@ -72,6 +77,7 @@ impl ScoreWithRatioResult { matching_words: results.matching_words, candidates: results.candidates, document_scores, + degraded: results.degraded, } } @@ -106,6 +112,7 @@ impl ScoreWithRatioResult { candidates: left.candidates | right.candidates, documents_ids, document_scores, + degraded: left.degraded | right.degraded, } } } @@ -131,6 +138,7 @@ impl<'a> Search<'a> { index: self.index, distribution_shift: self.distribution_shift, embedder_name: self.embedder_name.clone(), + time_budget: self.time_budget.clone(), }; let vector_query = search.vector.take(); diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index dc8354486..b3dd0c091 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -11,7 +11,7 @@ use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::vector::DistributionShift; use crate::{ execute_search, filtered_universe, AscDesc, DefaultSearchLogger, DocumentId, Index, Result, - SearchContext, + SearchContext, TimeBudget, }; // Building these factories is not free. @@ -43,6 +43,8 @@ pub struct Search<'a> { index: &'a Index, distribution_shift: Option, embedder_name: Option, + + time_budget: TimeBudget, } impl<'a> Search<'a> { @@ -64,6 +66,7 @@ impl<'a> Search<'a> { index, distribution_shift: None, embedder_name: None, + time_budget: TimeBudget::max(), } } @@ -143,6 +146,11 @@ impl<'a> Search<'a> { self } + pub fn time_budget(&mut self, time_budget: TimeBudget) -> &mut Search<'a> { + self.time_budget = time_budget; + self + } + pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result { if has_vector_search { let ctx = SearchContext::new(self.index, self.rtxn); @@ -169,36 +177,43 @@ impl<'a> Search<'a> { } let universe = filtered_universe(&ctx, &self.filter)?; - let PartialSearchResult { located_query_terms, candidates, documents_ids, document_scores } = - match self.vector.as_ref() { - Some(vector) => execute_vector_search( - &mut ctx, - vector, - self.scoring_strategy, - universe, - &self.sort_criteria, - self.geo_strategy, - self.offset, - self.limit, - self.distribution_shift, - embedder_name, - )?, - None => execute_search( - &mut ctx, - self.query.as_deref(), - self.terms_matching_strategy, - self.scoring_strategy, - self.exhaustive_number_hits, - universe, - &self.sort_criteria, - self.geo_strategy, - self.offset, - self.limit, - Some(self.words_limit), - &mut DefaultSearchLogger, - &mut DefaultSearchLogger, - )?, - }; + let PartialSearchResult { + located_query_terms, + candidates, + documents_ids, + document_scores, + degraded, + } = match self.vector.as_ref() { + Some(vector) => execute_vector_search( + &mut ctx, + vector, + self.scoring_strategy, + universe, + &self.sort_criteria, + self.geo_strategy, + self.offset, + self.limit, + self.distribution_shift, + embedder_name, + self.time_budget.clone(), + )?, + None => execute_search( + &mut ctx, + self.query.as_deref(), + self.terms_matching_strategy, + self.scoring_strategy, + self.exhaustive_number_hits, + universe, + &self.sort_criteria, + self.geo_strategy, + self.offset, + self.limit, + Some(self.words_limit), + &mut DefaultSearchLogger, + &mut DefaultSearchLogger, + self.time_budget.clone(), + )?, + }; // consume context and located_query_terms to build MatchingWords. let matching_words = match located_query_terms { @@ -206,7 +221,7 @@ impl<'a> Search<'a> { None => MatchingWords::default(), }; - Ok(SearchResult { matching_words, candidates, document_scores, documents_ids }) + Ok(SearchResult { matching_words, candidates, document_scores, documents_ids, degraded }) } } @@ -229,6 +244,7 @@ impl fmt::Debug for Search<'_> { index: _, distribution_shift, embedder_name, + time_budget, } = self; f.debug_struct("Search") .field("query", query) @@ -244,6 +260,7 @@ impl fmt::Debug for Search<'_> { .field("words_limit", words_limit) .field("distribution_shift", distribution_shift) .field("embedder_name", embedder_name) + .field("time_budget", time_budget) .finish() } } @@ -254,6 +271,7 @@ pub struct SearchResult { pub candidates: RoaringBitmap, pub documents_ids: Vec, pub document_scores: Vec>, + pub degraded: bool, } #[derive(Debug, Clone, Copy, PartialEq, Eq)] diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index 02528e378..521fcb983 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -5,12 +5,14 @@ use super::ranking_rules::{BoxRankingRule, RankingRuleQueryTrait}; use super::SearchContext; use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::{apply_distinct_rule, distinct_single_docid, DistinctOutput}; -use crate::Result; +use crate::{Result, TimeBudget}; pub struct BucketSortOutput { pub docids: Vec, pub scores: Vec>, pub all_candidates: RoaringBitmap, + + pub degraded: bool, } // TODO: would probably be good to regroup some of these inside of a struct? @@ -25,6 +27,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( length: usize, scoring_strategy: ScoringStrategy, logger: &mut dyn SearchLogger, + time_budget: TimeBudget, ) -> Result { logger.initial_query(query); logger.ranking_rules(&ranking_rules); @@ -41,6 +44,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( docids: vec![], scores: vec![], all_candidates: universe.clone(), + degraded: false, }); } if ranking_rules.is_empty() { @@ -74,6 +78,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( scores: vec![Default::default(); results.len()], docids: results, all_candidates, + degraded: false, }); } else { let docids: Vec = universe.iter().skip(from).take(length).collect(); @@ -81,6 +86,7 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( scores: vec![Default::default(); docids.len()], docids, all_candidates: universe.clone(), + degraded: false, }); }; } @@ -154,6 +160,28 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( } while valid_docids.len() < length { + if time_budget.exceeded() { + loop { + let bucket = std::mem::take(&mut ranking_rule_universes[cur_ranking_rule_index]); + ranking_rule_scores.push(ScoreDetails::Skipped); + maybe_add_to_results!(bucket); + ranking_rule_scores.pop(); + + if cur_ranking_rule_index == 0 { + break; + } + + back!(); + } + + return Ok(BucketSortOutput { + scores: valid_scores, + docids: valid_docids, + all_candidates, + degraded: true, + }); + } + // The universe for this bucket is zero, so we don't need to sort // anything, just go back to the parent ranking rule. if ranking_rule_universes[cur_ranking_rule_index].is_empty() @@ -219,7 +247,12 @@ pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( )?; } - Ok(BucketSortOutput { docids: valid_docids, scores: valid_scores, all_candidates }) + Ok(BucketSortOutput { + docids: valid_docids, + scores: valid_scores, + all_candidates, + degraded: false, + }) } /// Add the candidates to the results. Take `distinct`, `from`, `length`, and `cur_offset` diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index 8de1d9262..2913f206d 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -502,7 +502,7 @@ mod tests { use super::*; use crate::index::tests::TempIndex; - use crate::{execute_search, filtered_universe, SearchContext}; + use crate::{execute_search, filtered_universe, SearchContext, TimeBudget}; impl<'a> MatcherBuilder<'a> { fn new_test(rtxn: &'a heed::RoTxn, index: &'a TempIndex, query: &str) -> Self { @@ -522,6 +522,7 @@ mod tests { Some(10), &mut crate::DefaultSearchLogger, &mut crate::DefaultSearchLogger, + TimeBudget::max(), ) .unwrap(); diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index ae661e3f6..ad996f363 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -52,7 +52,8 @@ use crate::score_details::{ScoreDetails, ScoringStrategy}; use crate::search::new::distinct::apply_distinct_rule; use crate::vector::DistributionShift; use crate::{ - AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, UserError, + AscDesc, DocumentId, FieldId, Filter, Index, Member, Result, TermsMatchingStrategy, TimeBudget, + UserError, }; /// A structure used throughout the execution of a search query. @@ -518,6 +519,7 @@ pub fn execute_vector_search( length: usize, distribution_shift: Option, embedder_name: &str, + time_budget: TimeBudget, ) -> Result { check_sort_criteria(ctx, sort_criteria.as_ref())?; @@ -537,7 +539,7 @@ pub fn execute_vector_search( let placeholder_search_logger: &mut dyn SearchLogger = &mut placeholder_search_logger; - let BucketSortOutput { docids, scores, all_candidates } = bucket_sort( + let BucketSortOutput { docids, scores, all_candidates, degraded } = bucket_sort( ctx, ranking_rules, &PlaceholderQuery, @@ -546,6 +548,7 @@ pub fn execute_vector_search( length, scoring_strategy, placeholder_search_logger, + time_budget, )?; Ok(PartialSearchResult { @@ -553,6 +556,7 @@ pub fn execute_vector_search( document_scores: scores, documents_ids: docids, located_query_terms: None, + degraded, }) } @@ -572,6 +576,7 @@ pub fn execute_search( words_limit: Option, placeholder_search_logger: &mut dyn SearchLogger, query_graph_logger: &mut dyn SearchLogger, + time_budget: TimeBudget, ) -> Result { check_sort_criteria(ctx, sort_criteria.as_ref())?; @@ -648,6 +653,7 @@ pub fn execute_search( length, scoring_strategy, query_graph_logger, + time_budget, )? } else { let ranking_rules = @@ -661,10 +667,11 @@ pub fn execute_search( length, scoring_strategy, placeholder_search_logger, + time_budget, )? }; - let BucketSortOutput { docids, scores, mut all_candidates } = bucket_sort_output; + let BucketSortOutput { docids, scores, mut all_candidates, degraded } = bucket_sort_output; let fields_ids_map = ctx.index.fields_ids_map(ctx.txn)?; // The candidates is the universe unless the exhaustive number of hits @@ -682,6 +689,7 @@ pub fn execute_search( document_scores: scores, documents_ids: docids, located_query_terms, + degraded, }) } @@ -742,4 +750,6 @@ pub struct PartialSearchResult { pub candidates: RoaringBitmap, pub documents_ids: Vec, pub document_scores: Vec>, + + pub degraded: bool, } diff --git a/milli/src/search/new/tests/cutoff.rs b/milli/src/search/new/tests/cutoff.rs new file mode 100644 index 000000000..63b67f2e7 --- /dev/null +++ b/milli/src/search/new/tests/cutoff.rs @@ -0,0 +1,429 @@ +//! This module test the search cutoff and ensure a few things: +//! 1. A basic test works and mark the search as degraded +//! 2. A test that ensure the filters are affectively applied even with a cutoff of 0 +//! 3. A test that ensure the cutoff works well with the ranking scores + +use std::time::Duration; + +use big_s::S; +use maplit::hashset; +use meili_snap::snapshot; + +use crate::index::tests::TempIndex; +use crate::score_details::{ScoreDetails, ScoringStrategy}; +use crate::{Criterion, Filter, Search, TimeBudget}; + +fn create_index() -> TempIndex { + let index = TempIndex::new(); + + index + .update_settings(|s| { + s.set_primary_key("id".to_owned()); + s.set_searchable_fields(vec!["text".to_owned()]); + s.set_filterable_fields(hashset! { S("id") }); + s.set_criteria(vec![Criterion::Words, Criterion::Typo]); + }) + .unwrap(); + + // reverse the ID / insertion order so we see better what was sorted from what got the insertion order ordering + index + .add_documents(documents!([ + { + "id": 4, + "text": "hella puppo kefir", + }, + { + "id": 3, + "text": "hella puppy kefir", + }, + { + "id": 2, + "text": "hello", + }, + { + "id": 1, + "text": "hello puppy", + }, + { + "id": 0, + "text": "hello puppy kefir", + }, + ])) + .unwrap(); + index +} + +#[test] +fn basic_degraded_search() { + let index = create_index(); + let rtxn = index.read_txn().unwrap(); + + let mut search = Search::new(&rtxn, &index); + search.query("hello puppy kefir"); + search.limit(3); + search.time_budget(TimeBudget::new(Duration::from_millis(0))); + + let result = search.execute().unwrap(); + assert!(result.degraded); +} + +#[test] +fn degraded_search_cannot_skip_filter() { + let index = create_index(); + let rtxn = index.read_txn().unwrap(); + + let mut search = Search::new(&rtxn, &index); + search.query("hello puppy kefir"); + search.limit(100); + search.time_budget(TimeBudget::new(Duration::from_millis(0))); + let filter_condition = Filter::from_str("id > 2").unwrap().unwrap(); + search.filter(filter_condition); + + let result = search.execute().unwrap(); + assert!(result.degraded); + snapshot!(format!("{:?}\n{:?}", result.candidates, result.documents_ids), @r###" + RoaringBitmap<[0, 1]> + [0, 1] + "###); +} + +#[test] +#[allow(clippy::format_collect)] // the test is already quite big +fn degraded_search_and_score_details() { + let index = create_index(); + let rtxn = index.read_txn().unwrap(); + + let mut search = Search::new(&rtxn, &index); + search.query("hello puppy kefir"); + search.limit(4); + search.scoring_strategy(ScoringStrategy::Detailed); + search.time_budget(TimeBudget::max()); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [4, 1, 0, 3] + Scores: 1.0000 0.9167 0.8333 0.6667 + Score Details: + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 2, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 2, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 2, + }, + ), + ], + ] + "###); + + // Do ONE loop iteration. Not much can be deduced, almost everyone matched the words first bucket. + search.time_budget(TimeBudget::max().with_stop_after(1)); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [0, 1, 4, 2] + Scores: 0.6667 0.6667 0.6667 0.0000 + Score Details: + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Skipped, + ], + ] + "###); + + // Do TWO loop iterations. The first document should be entirely sorted + search.time_budget(TimeBudget::max().with_stop_after(2)); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [4, 0, 1, 2] + Scores: 1.0000 0.6667 0.6667 0.0000 + Score Details: + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Skipped, + ], + ] + "###); + + // Do THREE loop iterations. The second document should be entirely sorted as well + search.time_budget(TimeBudget::max().with_stop_after(3)); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [4, 1, 0, 2] + Scores: 1.0000 0.9167 0.6667 0.0000 + Score Details: + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Skipped, + ], + [ + Skipped, + ], + ] + "###); + + // Do FOUR loop iterations. The third document should be entirely sorted as well + // The words bucket have still not progressed thus the last document doesn't have any info yet. + search.time_budget(TimeBudget::max().with_stop_after(4)); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [4, 1, 0, 2] + Scores: 1.0000 0.9167 0.8333 0.0000 + Score Details: + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 2, + max_typo_count: 3, + }, + ), + ], + [ + Skipped, + ], + ] + "###); + + // After SIX loop iteration. The words ranking rule gave us a new bucket. + // Since we reached the limit we were able to early exit without checking the typo ranking rule. + search.time_budget(TimeBudget::max().with_stop_after(6)); + + let result = search.execute().unwrap(); + snapshot!(format!("IDs: {:?}\nScores: {}\nScore Details:\n{:#?}", result.documents_ids, result.document_scores.iter().map(|scores| format!("{:.4} ", ScoreDetails::global_score(scores.iter()))).collect::(), result.document_scores), @r###" + IDs: [4, 1, 0, 3] + Scores: 1.0000 0.9167 0.8333 0.3333 + Score Details: + [ + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 0, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 1, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 3, + max_matching_words: 3, + }, + ), + Typo( + Typo { + typo_count: 2, + max_typo_count: 3, + }, + ), + ], + [ + Words( + Words { + matching_words: 2, + max_matching_words: 3, + }, + ), + Skipped, + ], + ] + "###); +} diff --git a/milli/src/search/new/tests/mod.rs b/milli/src/search/new/tests/mod.rs index e500d16fb..26199b79b 100644 --- a/milli/src/search/new/tests/mod.rs +++ b/milli/src/search/new/tests/mod.rs @@ -1,5 +1,6 @@ pub mod attribute_fid; pub mod attribute_position; +pub mod cutoff; pub mod distinct; pub mod exactness; pub mod geo_sort; diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 46014202b..f54f45e1e 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -150,6 +150,7 @@ pub struct Settings<'a, 't, 'i> { pagination_max_total_hits: Setting, proximity_precision: Setting, embedder_settings: Setting>>, + search_cutoff: Setting, } impl<'a, 't, 'i> Settings<'a, 't, 'i> { @@ -183,6 +184,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { pagination_max_total_hits: Setting::NotSet, proximity_precision: Setting::NotSet, embedder_settings: Setting::NotSet, + search_cutoff: Setting::NotSet, indexer_config, } } @@ -373,6 +375,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { self.embedder_settings = Setting::Reset; } + pub fn set_search_cutoff(&mut self, value: u64) { + self.search_cutoff = Setting::Set(value); + } + + pub fn reset_search_cutoff(&mut self) { + self.search_cutoff = Setting::Reset; + } + #[tracing::instrument( level = "trace" skip(self, progress_callback, should_abort, old_fields_ids_map), @@ -1026,6 +1036,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { Ok(update) } + fn update_search_cutoff(&mut self) -> Result { + let changed = match self.search_cutoff { + Setting::Set(new) => { + let old = self.index.search_cutoff(self.wtxn)?; + if old == Some(new) { + false + } else { + self.index.put_search_cutoff(self.wtxn, new)?; + true + } + } + Setting::Reset => self.index.delete_search_cutoff(self.wtxn)?, + Setting::NotSet => false, + }; + + Ok(changed) + } + pub fn execute(mut self, progress_callback: FP, should_abort: FA) -> Result<()> where FP: Fn(UpdateIndexingStep) + Sync, @@ -1079,6 +1107,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> { // 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage let embedding_configs_updated = self.update_embedding_configs()?; + // never trigger re-indexing + self.update_search_cutoff()?; + if stop_words_updated || non_separator_tokens_updated || separator_tokens_updated @@ -2035,6 +2066,7 @@ mod tests { pagination_max_total_hits, proximity_precision, embedder_settings, + search_cutoff, } = settings; assert!(matches!(searchable_fields, Setting::NotSet)); assert!(matches!(displayed_fields, Setting::NotSet)); @@ -2058,6 +2090,7 @@ mod tests { assert!(matches!(pagination_max_total_hits, Setting::NotSet)); assert!(matches!(proximity_precision, Setting::NotSet)); assert!(matches!(embedder_settings, Setting::NotSet)); + assert!(matches!(search_cutoff, Setting::NotSet)); }) .unwrap(); }