From ade54493aba062de21bebc79d6607e026775d655 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 14 Aug 2024 11:33:17 +0200 Subject: [PATCH] Only detect language for a facet if several locales have been specified by the user in the settings --- meilisearch/src/search/mod.rs | 14 ++++++++++---- milli/src/search/facet/search.rs | 10 +++++++++- .../extract/extract_facet_string_docids.rs | 9 +++++++-- 3 files changed, 26 insertions(+), 7 deletions(-) diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs index dada9159b..915505be0 100644 --- a/meilisearch/src/search/mod.rs +++ b/meilisearch/src/search/mod.rs @@ -1369,12 +1369,18 @@ pub fn perform_facet_search( None => TimeBudget::default(), }; + // In the faceted search context, we want to use the intersection between the locales provided by the user + // and the locales of the facet string. + // If the facet string is not localized, we **ignore** the locales provided by the user because the facet data has no locale. + // If the user does not provide locales, we use the locales of the facet string. let localized_attributes = index.localized_attributes_rules(&rtxn)?.unwrap_or_default(); - let locales = locales.or_else(|| { - localized_attributes + let localized_attributes_locales = + localized_attributes.into_iter().find(|attr| attr.match_str(&facet_name)); + let locales = localized_attributes_locales.map(|attr| { + attr.locales .into_iter() - .find(|attr| attr.match_str(&facet_name)) - .map(|attr| attr.locales) + .filter(|locale| locales.as_ref().map_or(true, |locales| locales.contains(locale))) + .collect() }); let (search, _, _, _) = diff --git a/milli/src/search/facet/search.rs b/milli/src/search/facet/search.rs index 39fb7374a..cdba7ee16 100644 --- a/milli/src/search/facet/search.rs +++ b/milli/src/search/facet/search.rs @@ -339,10 +339,18 @@ impl ValuesCollection { fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String { let options = NormalizerOption { lossy: true, ..Default::default() }; let mut detection = StrDetection::new(facet_string, locales); + + // Detect the language of the facet string only if several locales are explicitly provided. + let language = match locales { + Some(&[language]) => Some(language), + Some(multiple_locales) if multiple_locales.len() > 1 => detection.language(), + _ => None, + }; + let token = Token { lemma: std::borrow::Cow::Borrowed(facet_string), script: detection.script(), - language: detection.language(), + language, ..Default::default() }; diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs index 45a7696ac..69663bb08 100644 --- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs +++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs @@ -271,7 +271,7 @@ fn extract_facet_string_docids_settings( /// Normalizes the facet string and truncates it to the max length. #[tracing::instrument(level = "trace", skip_all, target = "indexing::extract")] fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String { - let options = NormalizerOption { lossy: true, ..Default::default() }; + let options: NormalizerOption = NormalizerOption { lossy: true, ..Default::default() }; let mut detection = StrDetection::new(facet_string, locales); let script = { @@ -285,7 +285,12 @@ fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> S let span = tracing::trace_span!(target: "indexing::extract::extract_facet_string_docids", "detect_language"); let _entered = span.enter(); - detection.language() + // Detect the language of the facet string only if several locales are explicitly provided. + match locales { + Some(&[language]) => Some(language), + Some(multiple_locales) if multiple_locales.len() > 1 => detection.language(), + _ => None, + } }; let token = Token {