4313: Fix document formatting performances r=Kerollmops a=ManyTheFish

reduce the formatted option list to the attributes that should be formatted,
instead of all the attributes to display.
The time to compute the `format` list scales with the number of fields to format;
cumulated with `map_leaf_values` that iterates over all the nested fields, it gives a quadratic complexity:
`d*f` where `d` is the total number of fields to display and `f` is the total number of fields to format.

Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-01-11 14:19:44 +00:00 committed by GitHub
commit e93d36d5b9
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
5 changed files with 24 additions and 6 deletions

View File

@ -900,6 +900,14 @@ fn format_fields<'a>(
let mut matches_position = compute_matches.then(BTreeMap::new);
let mut document = document.clone();
// reduce the formatted option list to the attributes that should be formatted,
// instead of all the attributes to display.
let formatting_fields_options: Vec<_> = formatted_options
.iter()
.filter(|(_, option)| option.should_format())
.map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option))
.collect();
// select the attributes to retrieve
let displayable_names =
displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name"));
@ -908,13 +916,15 @@ fn format_fields<'a>(
// to the value and merge them together. eg. If a user said he wanted to highlight `doggo`
// and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only
// highlighted.
let format = formatted_options
// Warn: The time to compute the format list scales with the number of fields to format;
// cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity:
// d*f where d is the total number of fields to display and f is the total number of fields to format.
let format = formatting_fields_options
.iter()
.filter(|(field, _option)| {
let name = field_ids_map.name(**field).unwrap();
.filter(|(name, _option)| {
milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)
})
.map(|(_, option)| *option)
.map(|(_, option)| **option)
.reduce(|acc, option| acc.merge(option));
let mut infos = Vec::new();
@ -1011,7 +1021,7 @@ fn format_value<'a>(
let value = matcher.format(format_options);
Value::String(value.into_owned())
}
None => Value::Number(number),
None => Value::String(s),
}
}
value => value,

View File

@ -15,6 +15,7 @@ pub struct BucketSortOutput {
// TODO: would probably be good to regroup some of these inside of a struct?
#[allow(clippy::too_many_arguments)]
#[logging_timer::time]
pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>(
ctx: &mut SearchContext<'ctx>,
mut ranking_rules: Vec<BoxRankingRule<'ctx, Q>>,

View File

@ -72,7 +72,7 @@ impl<'m> MatcherBuilder<'m> {
}
}
#[derive(Copy, Clone, Default)]
#[derive(Copy, Clone, Default, Debug)]
pub struct FormatOptions {
pub highlight: bool,
pub crop: Option<usize>,
@ -82,6 +82,10 @@ impl FormatOptions {
pub fn merge(self, other: Self) -> Self {
Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) }
}
pub fn should_format(&self) -> bool {
self.highlight || self.crop.is_some()
}
}
#[derive(Clone, Debug)]

View File

@ -191,6 +191,7 @@ fn resolve_maximally_reduced_query_graph(
Ok(docids)
}
#[logging_timer::time]
fn resolve_universe(
ctx: &mut SearchContext,
initial_universe: &RoaringBitmap,
@ -556,6 +557,7 @@ pub fn execute_vector_search(
}
#[allow(clippy::too_many_arguments)]
#[logging_timer::time]
pub fn execute_search(
ctx: &mut SearchContext,
query: Option<&str>,

View File

@ -5,6 +5,7 @@ use super::*;
use crate::{Result, SearchContext, MAX_WORD_LENGTH};
/// Convert the tokenised search query into a list of located query terms.
#[logging_timer::time]
pub fn located_query_terms_from_tokens(
ctx: &mut SearchContext,
query: NormalizedTokenIter,