From 5f4fc6c95569b5b3c7aa81ffee3eea02d0100095 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 11 Jan 2024 09:44:16 +0100 Subject: [PATCH 1/5] Add timer logs --- milli/src/search/new/bucket_sort.rs | 1 + milli/src/search/new/mod.rs | 2 ++ milli/src/search/new/query_term/parse_query.rs | 1 + 3 files changed, 4 insertions(+) diff --git a/milli/src/search/new/bucket_sort.rs b/milli/src/search/new/bucket_sort.rs index b46f6124f..b439b87ec 100644 --- a/milli/src/search/new/bucket_sort.rs +++ b/milli/src/search/new/bucket_sort.rs @@ -15,6 +15,7 @@ pub struct BucketSortOutput { // TODO: would probably be good to regroup some of these inside of a struct? #[allow(clippy::too_many_arguments)] +#[logging_timer::time] pub fn bucket_sort<'ctx, Q: RankingRuleQueryTrait>( ctx: &mut SearchContext<'ctx>, mut ranking_rules: Vec>, diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs index 405b9747d..7b3b1d5b2 100644 --- a/milli/src/search/new/mod.rs +++ b/milli/src/search/new/mod.rs @@ -191,6 +191,7 @@ fn resolve_maximally_reduced_query_graph( Ok(docids) } +#[logging_timer::time] fn resolve_universe( ctx: &mut SearchContext, initial_universe: &RoaringBitmap, @@ -556,6 +557,7 @@ pub fn execute_vector_search( } #[allow(clippy::too_many_arguments)] +#[logging_timer::time] pub fn execute_search( ctx: &mut SearchContext, query: Option<&str>, diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs index 64fe07a31..865075d97 100644 --- a/milli/src/search/new/query_term/parse_query.rs +++ b/milli/src/search/new/query_term/parse_query.rs @@ -5,6 +5,7 @@ use super::*; use crate::{Result, SearchContext, MAX_WORD_LENGTH}; /// Convert the tokenised search query into a list of located query terms. +#[logging_timer::time] pub fn located_query_terms_from_tokens( ctx: &mut SearchContext, query: NormalizedTokenIter, From 5f5a4868953e94f885654a6d2b8bfbd8afae3da0 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 11 Jan 2024 11:36:41 +0100 Subject: [PATCH 2/5] Reduce formatting time --- meilisearch/src/search.rs | 23 +++++++++++++++++++---- milli/src/search/new/matches/mod.rs | 6 +++++- 2 files changed, 24 insertions(+), 5 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index b5dba8a58..7e783aa42 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -897,6 +897,14 @@ fn format_fields<'a>( let mut matches_position = compute_matches.then(BTreeMap::new); let mut document = document.clone(); + // reduce the formated option list to the attributes that should be formatted, + // instead of all the attribute to display. + let formating_fields_options: Vec<_> = formatted_options + .iter() + .filter(|(_, option)| option.should_format()) + .map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option)) + .collect(); + // select the attributes to retrieve let displayable_names = displayable_ids.iter().map(|&fid| field_ids_map.name(fid).expect("Missing field name")); @@ -905,13 +913,15 @@ fn format_fields<'a>( // to the value and merge them together. eg. If a user said he wanted to highlight `doggo` // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only // highlighted. - let format = formatted_options + // Warn: The time to compute the `format` list scales with the number of field to format; + // cummulated with `map_leaf_values` that iterates over all the nested fields, it gives a quadratic complexity: + // `d*f` where `d` is the total number of field to display and `f` the total number of field to format. + let format = formating_fields_options .iter() - .filter(|(field, _option)| { - let name = field_ids_map.name(**field).unwrap(); + .filter(|(name, _option)| { milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name) }) - .map(|(_, option)| *option) + .map(|(_, option)| **option) .reduce(|acc, option| acc.merge(option)); let mut infos = Vec::new(); @@ -941,6 +951,11 @@ fn format_value<'a>( infos: &mut Vec, compute_matches: bool, ) -> Value { + // early skip recursive function if nothing needs to be changed. + if !format_options.as_ref().map_or(false, FormatOptions::should_format) && !compute_matches { + return value; + } + match value { Value::String(old_string) => { let mut matcher = builder.build(&old_string); diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs index 067fa1efd..8de1d9262 100644 --- a/milli/src/search/new/matches/mod.rs +++ b/milli/src/search/new/matches/mod.rs @@ -72,7 +72,7 @@ impl<'m> MatcherBuilder<'m> { } } -#[derive(Copy, Clone, Default)] +#[derive(Copy, Clone, Default, Debug)] pub struct FormatOptions { pub highlight: bool, pub crop: Option, @@ -82,6 +82,10 @@ impl FormatOptions { pub fn merge(self, other: Self) -> Self { Self { highlight: self.highlight || other.highlight, crop: self.crop.or(other.crop) } } + + pub fn should_format(&self) -> bool { + self.highlight || self.crop.is_some() + } } #[derive(Clone, Debug)] From 81b6128b29b612049c5579ca6fb9082d5f46a216 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 11 Jan 2024 12:28:32 +0100 Subject: [PATCH 3/5] Update tests --- meilisearch/tests/settings/tokenizer_customization.rs | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/meilisearch/tests/settings/tokenizer_customization.rs b/meilisearch/tests/settings/tokenizer_customization.rs index 4602e31f7..7f205eb30 100644 --- a/meilisearch/tests/settings/tokenizer_customization.rs +++ b/meilisearch/tests/settings/tokenizer_customization.rs @@ -94,7 +94,7 @@ async fn set_and_search() { "id": 1, "content": "Mac & cheese", "_formatted": { - "id": "1", + "id": 1, "content": "Mac & cheese" } }, @@ -102,7 +102,7 @@ async fn set_and_search() { "id": 3, "content": "Mac&sep&&sepcheese", "_formatted": { - "id": "3", + "id": 3, "content": "Mac&sep&&sepcheese" } } @@ -254,7 +254,7 @@ async fn advanced_synergies() { "id": 1, "content": "J.R.R. Tolkien", "_formatted": { - "id": "1", + "id": 1, "content": "J.R.R. Tolkien" } }, @@ -262,7 +262,7 @@ async fn advanced_synergies() { "id": 2, "content": "J. R. R. Tolkien", "_formatted": { - "id": "2", + "id": 2, "content": "J. R. R. Tolkien" } }, @@ -270,7 +270,7 @@ async fn advanced_synergies() { "id": 3, "content": "jrr Tolkien", "_formatted": { - "id": "3", + "id": 3, "content": "jrr Tolkien" } } From 86270e687837fcafc2730b017c29a01d3d2e6613 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 11 Jan 2024 12:43:56 +0100 Subject: [PATCH 4/5] Transform fields contained into _format into strings --- meilisearch/src/search.rs | 7 +------ meilisearch/tests/settings/tokenizer_customization.rs | 10 +++++----- 2 files changed, 6 insertions(+), 11 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 7e783aa42..6893081d4 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -951,11 +951,6 @@ fn format_value<'a>( infos: &mut Vec, compute_matches: bool, ) -> Value { - // early skip recursive function if nothing needs to be changed. - if !format_options.as_ref().map_or(false, FormatOptions::should_format) && !compute_matches { - return value; - } - match value { Value::String(old_string) => { let mut matcher = builder.build(&old_string); @@ -1023,7 +1018,7 @@ fn format_value<'a>( let value = matcher.format(format_options); Value::String(value.into_owned()) } - None => Value::Number(number), + None => Value::String(s), } } value => value, diff --git a/meilisearch/tests/settings/tokenizer_customization.rs b/meilisearch/tests/settings/tokenizer_customization.rs index 7f205eb30..4602e31f7 100644 --- a/meilisearch/tests/settings/tokenizer_customization.rs +++ b/meilisearch/tests/settings/tokenizer_customization.rs @@ -94,7 +94,7 @@ async fn set_and_search() { "id": 1, "content": "Mac & cheese", "_formatted": { - "id": 1, + "id": "1", "content": "Mac & cheese" } }, @@ -102,7 +102,7 @@ async fn set_and_search() { "id": 3, "content": "Mac&sep&&sepcheese", "_formatted": { - "id": 3, + "id": "3", "content": "Mac&sep&&sepcheese" } } @@ -254,7 +254,7 @@ async fn advanced_synergies() { "id": 1, "content": "J.R.R. Tolkien", "_formatted": { - "id": 1, + "id": "1", "content": "J.R.R. Tolkien" } }, @@ -262,7 +262,7 @@ async fn advanced_synergies() { "id": 2, "content": "J. R. R. Tolkien", "_formatted": { - "id": 2, + "id": "2", "content": "J. R. R. Tolkien" } }, @@ -270,7 +270,7 @@ async fn advanced_synergies() { "id": 3, "content": "jrr Tolkien", "_formatted": { - "id": 3, + "id": "3", "content": "jrr Tolkien" } } From 95f8e2153394f3089602b1ffa16b3548c520cc98 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Thu, 11 Jan 2024 15:07:08 +0100 Subject: [PATCH 5/5] fix typos --- meilisearch/src/search.rs | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/meilisearch/src/search.rs b/meilisearch/src/search.rs index 6893081d4..28ca5a6e1 100644 --- a/meilisearch/src/search.rs +++ b/meilisearch/src/search.rs @@ -897,9 +897,9 @@ fn format_fields<'a>( let mut matches_position = compute_matches.then(BTreeMap::new); let mut document = document.clone(); - // reduce the formated option list to the attributes that should be formatted, - // instead of all the attribute to display. - let formating_fields_options: Vec<_> = formatted_options + // reduce the formatted option list to the attributes that should be formatted, + // instead of all the attributes to display. + let formatting_fields_options: Vec<_> = formatted_options .iter() .filter(|(_, option)| option.should_format()) .map(|(fid, option)| (field_ids_map.name(*fid).unwrap(), option)) @@ -913,10 +913,10 @@ fn format_fields<'a>( // to the value and merge them together. eg. If a user said he wanted to highlight `doggo` // and crop `doggo.name`. `doggo.name` needs to be highlighted + cropped while `doggo.age` is only // highlighted. - // Warn: The time to compute the `format` list scales with the number of field to format; - // cummulated with `map_leaf_values` that iterates over all the nested fields, it gives a quadratic complexity: - // `d*f` where `d` is the total number of field to display and `f` the total number of field to format. - let format = formating_fields_options + // Warn: The time to compute the format list scales with the number of fields to format; + // cumulated with map_leaf_values that iterates over all the nested fields, it gives a quadratic complexity: + // d*f where d is the total number of fields to display and f is the total number of fields to format. + let format = formatting_fields_options .iter() .filter(|(name, _option)| { milli::is_faceted_by(name, key) || milli::is_faceted_by(key, name)