4466: Implements the search cutoff r=irevoire a=irevoire

# Pull Request

## Related issue
Fixes https://github.com/meilisearch/meilisearch/issues/4488

## What does this PR do?
- Adds a cutoff to the bucket sort after 150ms has been spent
- Adds a new setting to customize the default value of 150ms
- When the time is exceeded, we exit early with what we had the time to sort
- If the cutoff has been reached, the search details are updated with a new `Skip` ranking details for the ranking rules that were skipped
- Adds analytics to measure the total number of degraded search requests
- Adds the number of degraded search requests to the Prometheus metrics and Grafana dashboard
- The cutoff **must not** skip the filters; otherwise, we would leak documents to people who don’t have the right to see them


Co-authored-by: Tamo <tamo@meilisearch.com>
Co-authored-by: Louis Dureuil <louis@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-03-20 13:06:53 +00:00 committed by GitHub
commit fc1c3f4a29
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
28 changed files with 1023 additions and 89 deletions

View file

@ -150,6 +150,7 @@ pub struct Settings<'a, 't, 'i> {
pagination_max_total_hits: Setting<usize>,
proximity_precision: Setting<ProximityPrecision>,
embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
search_cutoff: Setting<u64>,
}
impl<'a, 't, 'i> Settings<'a, 't, 'i> {
@ -183,6 +184,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
pagination_max_total_hits: Setting::NotSet,
proximity_precision: Setting::NotSet,
embedder_settings: Setting::NotSet,
search_cutoff: Setting::NotSet,
indexer_config,
}
}
@ -373,6 +375,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
self.embedder_settings = Setting::Reset;
}
pub fn set_search_cutoff(&mut self, value: u64) {
self.search_cutoff = Setting::Set(value);
}
pub fn reset_search_cutoff(&mut self) {
self.search_cutoff = Setting::Reset;
}
#[tracing::instrument(
level = "trace"
skip(self, progress_callback, should_abort, old_fields_ids_map),
@ -1026,6 +1036,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
Ok(update)
}
fn update_search_cutoff(&mut self) -> Result<bool> {
let changed = match self.search_cutoff {
Setting::Set(new) => {
let old = self.index.search_cutoff(self.wtxn)?;
if old == Some(new) {
false
} else {
self.index.put_search_cutoff(self.wtxn, new)?;
true
}
}
Setting::Reset => self.index.delete_search_cutoff(self.wtxn)?,
Setting::NotSet => false,
};
Ok(changed)
}
pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
where
FP: Fn(UpdateIndexingStep) + Sync,
@ -1079,6 +1107,9 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
// 3. Keep the old vectors but reattempt indexing on a prompt change: only actually changed prompt will need embedding + storage
let embedding_configs_updated = self.update_embedding_configs()?;
// never trigger re-indexing
self.update_search_cutoff()?;
if stop_words_updated
|| non_separator_tokens_updated
|| separator_tokens_updated
@ -2035,6 +2066,7 @@ mod tests {
pagination_max_total_hits,
proximity_precision,
embedder_settings,
search_cutoff,
} = settings;
assert!(matches!(searchable_fields, Setting::NotSet));
assert!(matches!(displayed_fields, Setting::NotSet));
@ -2058,6 +2090,7 @@ mod tests {
assert!(matches!(pagination_max_total_hits, Setting::NotSet));
assert!(matches!(proximity_precision, Setting::NotSet));
assert!(matches!(embedder_settings, Setting::NotSet));
assert!(matches!(search_cutoff, Setting::NotSet));
})
.unwrap();
}