From cc02920f2b038b05ddf99aa0ff8ef09db0b9b2b2 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 23 Jul 2024 14:59:31 +0200
Subject: [PATCH 1/9] Update charabia

---
 Cargo.lock                                    |  37 +---
 milli/Cargo.toml                              |   2 +-
 milli/src/index.rs                            |  23 +++
 milli/src/search/new/mod.rs                   |   6 +-
 .../extract/extract_docid_word_positions.rs   | 161 ++----------------
 .../src/update/index_documents/extract/mod.rs |  16 +-
 .../src/update/index_documents/typed_chunk.rs |  34 +---
 7 files changed, 49 insertions(+), 230 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index c72053be7..547f9c0e3 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -934,19 +934,15 @@ dependencies = [
 [[package]]
 name = "charabia"
 version = "0.8.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "9868a22f10dee80498a8a2b6c641d80bf28ea4495fcf71c2dc4836c2dd23958c"
+source = "git+https://github.com/meilisearch/charabia.git?branch=simplify-lang-detection#a95a9217265cee515708a679a2ed08ced1ac58a3"
 dependencies = [
  "aho-corasick",
- "cow-utils",
  "csv",
- "deunicode",
  "either",
  "fst",
  "irg-kvariants",
  "jieba-rs",
  "lindera",
- "litemap",
  "once_cell",
  "pinyin",
  "serde",
@@ -954,7 +950,6 @@ dependencies = [
  "unicode-normalization",
  "wana_kana",
  "whatlang",
- "zerovec",
 ]
 
 [[package]]
@@ -1145,12 +1140,6 @@ version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
 
-[[package]]
-name = "cow-utils"
-version = "0.1.2"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "79bb3adfaf5f75d24b01aee375f7555907840fa2800e5ec8fa3b9e2031830173"
-
 [[package]]
 name = "cpufeatures"
 version = "0.2.12"
@@ -1551,12 +1540,6 @@ dependencies = [
  "syn 2.0.60",
 ]
 
-[[package]]
-name = "deunicode"
-version = "1.6.0"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "339544cc9e2c4dc3fc7149fd630c5f22263a4fdf18a98afd0075784968b5cf00"
-
 [[package]]
 name = "digest"
 version = "0.10.7"
@@ -2666,8 +2649,7 @@ checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
 [[package]]
 name = "irg-kvariants"
 version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "ef2af7c331f2536964a32b78a7d2e0963d78b42f4a76323b16cc7d94b1ddce26"
+source = "git+https://github.com/meilisearch/charabia.git?branch=simplify-lang-detection#a95a9217265cee515708a679a2ed08ced1ac58a3"
 dependencies = [
  "csv",
  "once_cell",
@@ -3278,12 +3260,6 @@ dependencies = [
  "unicode-segmentation",
 ]
 
-[[package]]
-name = "litemap"
-version = "0.7.3"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704"
-
 [[package]]
 name = "lmdb-master-sys"
 version = "0.2.2"
@@ -6506,15 +6482,6 @@ dependencies = [
  "syn 2.0.60",
 ]
 
-[[package]]
-name = "zerovec"
-version = "0.10.4"
-source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
-dependencies = [
- "zerofrom",
-]
-
 [[package]]
 name = "zip"
 version = "1.1.4"
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index e635bbcf4..1a81f6f8c 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.9.1"
 bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
 byteorder = "1.5.0"
-charabia = { version = "0.8.12", default-features = false }
+charabia = { git = "https://github.com/meilisearch/charabia.git", branch = "simplify-lang-detection", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.13"
 deserr = "0.6.2"
diff --git a/milli/src/index.rs b/milli/src/index.rs
index afe212f57..194f18faa 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -1604,6 +1604,29 @@ impl Index {
         Ok(script_language)
     }
 
+    pub fn languages(&self, rtxn: &RoTxn<'_>) -> heed::Result<Vec<Language>> {
+        let mut script_language_doc_count: Vec<(Language, u64)> = Vec::new();
+        let mut total = 0;
+        for sl in self.script_language_docids.iter(rtxn)? {
+            let ((_script, language), docids) = sl?;
+
+            // keep only Languages that contains at least 1 document.
+            let remaining_documents_count = docids.len();
+            total += remaining_documents_count;
+            if remaining_documents_count > 0 {
+                script_language_doc_count.push((language, remaining_documents_count));
+            }
+        }
+
+        let threshold = total / 20; // 5% (arbitrary)
+
+        Ok(script_language_doc_count
+            .into_iter()
+            .filter(|(_, count)| *count > threshold)
+            .map(|(language, _)| language)
+            .collect())
+    }
+
     /// Put the embedding configs:
     /// 1. The name of the embedder
     /// 2. The configuration option for this embedder
diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs
index f6a4a802c..78b7a0446 100644
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -670,9 +670,9 @@ pub fn execute_search(
             tokbuilder.words_dict(dictionary);
         }
 
-        let script_lang_map = ctx.index.script_language(ctx.txn)?;
-        if !script_lang_map.is_empty() {
-            tokbuilder.allow_list(&script_lang_map);
+        let languages = ctx.index.languages(ctx.txn)?;
+        if !languages.is_empty() {
+            tokbuilder.allow_list(&languages);
         }
 
         let tokenizer = tokbuilder.build();
diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
index 721d67e96..748a3886a 100644
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -1,10 +1,9 @@
-use std::collections::HashMap;
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::BufReader;
 use std::{io, mem, str};
 
-use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
+use charabia::{Language, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
 use obkv::{KvReader, KvWriterU16};
 use roaring::RoaringBitmap;
 use serde_json::Value;
@@ -12,11 +11,9 @@ use serde_json::Value;
 use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters};
 use crate::error::{InternalError, SerializationError};
 use crate::update::del_add::{del_add_from_two_obkvs, DelAdd, KvReaderDelAdd};
-use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
+use crate::update::settings::InnerIndexSettingsDiff;
 use crate::{FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};
 
-pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>;
-
 /// Extracts the word and positions where this word appear and
 /// prefixes it by the document id.
 ///
@@ -28,7 +25,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
     indexer: GrenadParameters,
     settings_diff: &InnerIndexSettingsDiff,
     max_positions_per_attributes: Option<u32>,
-) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
+) -> Result<grenad::Reader<BufReader<File>>> {
     let max_positions_per_attributes = max_positions_per_attributes
         .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
     let max_memory = indexer.max_memory_by_thread();
@@ -36,7 +33,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
 
     // initialize destination values.
     let mut documents_ids = RoaringBitmap::new();
-    let mut script_language_docids = HashMap::new();
     let mut docid_word_positions_sorter = create_sorter(
         grenad::SortAlgorithm::Stable,
         keep_latest_obkv,
@@ -109,9 +105,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
         let (del, add): (Result<_>, Result<_>) = rayon::join(
             || {
                 // deletions
-                lang_safe_tokens_from_document(
+                tokens_from_document(
                     &obkv,
-                    &settings_diff.old,
+                    &settings_diff.old.searchable_fields_ids,
                     &del_tokenizer,
                     max_positions_per_attributes,
                     DelAdd::Deletion,
@@ -120,9 +116,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
             },
             || {
                 // additions
-                lang_safe_tokens_from_document(
+                tokens_from_document(
                     &obkv,
-                    &settings_diff.new,
+                    &settings_diff.new.searchable_fields_ids,
                     &add_tokenizer,
                     max_positions_per_attributes,
                     DelAdd::Addition,
@@ -131,8 +127,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
             },
         );
 
-        let (del_obkv, del_script_language_word_count) = del?;
-        let (add_obkv, add_script_language_word_count) = add?;
+        let del_obkv = del?;
+        let add_obkv = add?;
 
         // merge deletions and additions.
         // transforming two KV<FieldId, KV<u16, String>> into one KV<FieldId, KV<DelAdd, KV<u16, String>>>
@@ -150,31 +146,10 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
             key_buffer.extend_from_slice(&field_id.to_be_bytes());
             docid_word_positions_sorter.insert(&key_buffer, value)?;
         }
-
-        // update script_language_docids deletions.
-        for (script, languages_frequency) in del_script_language_word_count {
-            for (language, _) in languages_frequency {
-                let entry = script_language_docids
-                    .entry((script, language))
-                    .or_insert_with(|| (RoaringBitmap::new(), RoaringBitmap::new()));
-                entry.0.push(document_id);
-            }
-        }
-
-        // update script_language_docids additions.
-        for (script, languages_frequency) in add_script_language_word_count {
-            for (language, _) in languages_frequency {
-                let entry = script_language_docids
-                    .entry((script, language))
-                    .or_insert_with(|| (RoaringBitmap::new(), RoaringBitmap::new()));
-                entry.1.push(document_id);
-            }
-        }
     }
 
     // the returned sorter is serialized as: key: (DocId, FieldId), value: KV<DelAdd, KV<u16, String>>.
     sorter_into_reader(docid_word_positions_sorter, indexer)
-        .map(|reader| (reader, script_language_docids))
 }
 
 /// Check if any searchable fields of a document changed.
@@ -205,7 +180,7 @@ fn tokenizer_builder<'a>(
     stop_words: Option<&'a fst::Set<Vec<u8>>>,
     allowed_separators: Option<&'a [&str]>,
     dictionary: Option<&'a [&str]>,
-    script_language: Option<&'a HashMap<Script, Vec<Language>>>,
+    languages: Option<&'a Vec<Language>>,
 ) -> TokenizerBuilder<'a, Vec<u8>> {
     let mut tokenizer_builder = TokenizerBuilder::new();
     if let Some(stop_words) = stop_words {
@@ -218,81 +193,13 @@ fn tokenizer_builder<'a>(
         tokenizer_builder.separators(separators);
     }
 
-    if let Some(script_language) = script_language {
-        tokenizer_builder.allow_list(script_language);
+    if let Some(languages) = languages {
+        tokenizer_builder.allow_list(languages);
     }
 
     tokenizer_builder
 }
 
-/// Extract words mapped with their positions of a document,
-/// ensuring no Language detection mistakes was made.
-fn lang_safe_tokens_from_document<'a>(
-    obkv: &KvReader<'_, FieldId>,
-    settings: &InnerIndexSettings,
-    tokenizer: &Tokenizer<'_>,
-    max_positions_per_attributes: u32,
-    del_add: DelAdd,
-    buffers: &'a mut Buffers,
-) -> Result<(&'a [u8], HashMap<Script, Vec<(Language, usize)>>)> {
-    let mut script_language_word_count = HashMap::new();
-
-    tokens_from_document(
-        obkv,
-        &settings.searchable_fields_ids,
-        tokenizer,
-        max_positions_per_attributes,
-        del_add,
-        buffers,
-        &mut script_language_word_count,
-    )?;
-
-    // if we detect a potetial mistake in the language detection,
-    // we rerun the extraction forcing the tokenizer to detect the most frequently detected Languages.
-    // context: https://github.com/meilisearch/meilisearch/issues/3565
-    if script_language_word_count
-        .values()
-        .map(Vec::as_slice)
-        .any(potential_language_detection_error)
-    {
-        // build an allow list with the most frequent detected languages in the document.
-        let script_language: HashMap<_, _> =
-            script_language_word_count.iter().filter_map(most_frequent_languages).collect();
-
-        // if the allow list is empty, meaning that no Language is considered frequent,
-        // then we don't rerun the extraction.
-        if !script_language.is_empty() {
-            // build a new temporary tokenizer including the allow list.
-            let stop_words = settings.stop_words.as_ref();
-            let separators: Option<Vec<_>> = settings
-                .allowed_separators
-                .as_ref()
-                .map(|s| s.iter().map(String::as_str).collect());
-            let dictionary: Option<Vec<_>> =
-                settings.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-            let mut builder =
-                tokenizer_builder(stop_words, separators.as_deref(), dictionary.as_deref(), None);
-            let tokenizer = builder.build();
-
-            script_language_word_count.clear();
-
-            // rerun the extraction.
-            tokens_from_document(
-                obkv,
-                &settings.searchable_fields_ids,
-                &tokenizer,
-                max_positions_per_attributes,
-                del_add,
-                buffers,
-                &mut script_language_word_count,
-            )?;
-        }
-    }
-
-    // returns a (KV<FieldId, KV<u16, String>>, HashMap<Script, Vec<(Language, usize)>>)
-    Ok((&buffers.obkv_buffer, script_language_word_count))
-}
-
 /// Extract words mapped with their positions of a document.
 fn tokens_from_document<'a>(
     obkv: &KvReader<'a, FieldId>,
@@ -301,7 +208,6 @@ fn tokens_from_document<'a>(
     max_positions_per_attributes: u32,
     del_add: DelAdd,
     buffers: &'a mut Buffers,
-    script_language_word_count: &mut HashMap<Script, Vec<(Language, usize)>>,
 ) -> Result<&'a [u8]> {
     buffers.obkv_buffer.clear();
     let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
@@ -326,16 +232,6 @@ fn tokens_from_document<'a>(
                         .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
 
                     for (index, token) in tokens {
-                        // if a language has been detected for the token, we update the counter.
-                        if let Some(language) = token.language {
-                            let script = token.script;
-                            let entry = script_language_word_count.entry(script).or_default();
-                            match entry.iter_mut().find(|(l, _)| *l == language) {
-                                Some((_, n)) => *n += 1,
-                                None => entry.push((language, 1)),
-                            }
-                        }
-
                         // keep a word only if it is not empty and fit in a LMDB key.
                         let token = token.lemma().trim();
                         if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
@@ -423,39 +319,6 @@ fn process_tokens<'a>(
         .filter(|(_, t)| t.is_word())
 }
 
-fn potential_language_detection_error(languages_frequency: &[(Language, usize)]) -> bool {
-    if languages_frequency.len() > 1 {
-        let threshold = compute_language_frequency_threshold(languages_frequency);
-        languages_frequency.iter().any(|(_, c)| *c <= threshold)
-    } else {
-        false
-    }
-}
-
-fn most_frequent_languages(
-    (script, languages_frequency): (&Script, &Vec<(Language, usize)>),
-) -> Option<(Script, Vec<Language>)> {
-    if languages_frequency.len() > 1 {
-        let threshold = compute_language_frequency_threshold(languages_frequency);
-
-        let languages: Vec<_> =
-            languages_frequency.iter().filter(|(_, c)| *c > threshold).map(|(l, _)| *l).collect();
-
-        if languages.is_empty() {
-            None
-        } else {
-            Some((*script, languages))
-        }
-    } else {
-        None
-    }
-}
-
-fn compute_language_frequency_threshold(languages_frequency: &[(Language, usize)]) -> usize {
-    let total: usize = languages_frequency.iter().map(|(_, c)| c).sum();
-    total / 10 // 10% is a completely arbitrary value.
-}
-
 #[derive(Default)]
 struct Buffers {
     // the field buffer for each fields desserialization, and must be cleared between each field.
diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs
index 57d9d5e42..6c23a8da9 100644
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@@ -345,21 +345,17 @@ fn send_and_extract_flattened_documents_data(
     let (docid_word_positions_chunk, fid_docid_facet_values_chunks): (Result<_>, Result<_>) =
         rayon::join(
             || {
-                let (docid_word_positions_chunk, script_language_pair) =
-                    extract_docid_word_positions(
-                        flattened_documents_chunk.clone(),
-                        indexer,
-                        &settings_diff,
-                        max_positions_per_attributes,
-                    )?;
+                let docid_word_positions_chunk = extract_docid_word_positions(
+                    flattened_documents_chunk.clone(),
+                    indexer,
+                    &settings_diff,
+                    max_positions_per_attributes,
+                )?;
 
                 // send docid_word_positions_chunk to DB writer
                 let docid_word_positions_chunk =
                     unsafe { as_cloneable_grenad(&docid_word_positions_chunk)? };
 
-                let _ =
-                    lmdb_writer_sx.send(Ok(TypedChunk::ScriptLanguageDocids(script_language_pair)));
-
                 Ok(docid_word_positions_chunk)
             },
             || {
diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs
index c5cf35ca8..9de95778b 100644
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@@ -1,10 +1,9 @@
-use std::collections::{BTreeSet, HashMap};
+use std::collections::BTreeSet;
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::{self, BufReader};
 
 use bytemuck::allocation::pod_collect_to_vec;
-use charabia::{Language, Script};
 use grenad::{Merger, MergerBuilder};
 use heed::types::Bytes;
 use heed::{BytesDecode, RwTxn};
@@ -94,7 +93,6 @@ pub(crate) enum TypedChunk {
         add_to_user_provided: RoaringBitmap,
         remove_from_user_provided: RoaringBitmap,
     },
-    ScriptLanguageDocids(HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>),
 }
 
 impl TypedChunk {
@@ -113,8 +111,7 @@ impl TypedChunk {
             | (FieldIdFacetExistsDocids(_), FieldIdFacetExistsDocids(_))
             | (FieldIdFacetIsNullDocids(_), FieldIdFacetIsNullDocids(_))
             | (FieldIdFacetIsEmptyDocids(_), FieldIdFacetIsEmptyDocids(_))
-            | (GeoPoints(_), GeoPoints(_))
-            | (ScriptLanguageDocids(_), ScriptLanguageDocids(_)) => true,
+            | (GeoPoints(_), GeoPoints(_)) => true,
             (
                 VectorPoints { embedder_name: left, expected_dimension: left_dim, .. },
                 VectorPoints { embedder_name: right, expected_dimension: right_dim, .. },
@@ -775,33 +772,6 @@ pub(crate) fn write_typed_chunk_into_index(
 
             tracing::debug!("Finished vector chunk for {}", embedder_name);
         }
-        TypedChunk::ScriptLanguageDocids(_) => {
-            let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids");
-            let _entered = span.enter();
-
-            for typed_chunk in typed_chunks {
-                let TypedChunk::ScriptLanguageDocids(sl_map) = typed_chunk else { unreachable!() };
-                for (key, (deletion, addition)) in sl_map {
-                    let mut db_key_exists = false;
-                    let final_value = match index.script_language_docids.get(wtxn, &key)? {
-                        Some(db_values) => {
-                            db_key_exists = true;
-                            (db_values - deletion) | addition
-                        }
-                        None => addition,
-                    };
-
-                    if final_value.is_empty() {
-                        // If the database entry exists, delete it.
-                        if db_key_exists {
-                            index.script_language_docids.delete(wtxn, &key)?;
-                        }
-                    } else {
-                        index.script_language_docids.put(wtxn, &key, &final_value)?;
-                    }
-                }
-            }
-        }
     }
 
     Ok((RoaringBitmap::new(), is_merged_database))

From d82f8fd904bc291689673685a7b38bd749b9af01 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 23 Jul 2024 13:33:57 +0200
Subject: [PATCH 2/9] Add tests

---
 meilisearch/tests/search/locales.rs | 662 ++++++++++++++++++++++++++++
 meilisearch/tests/search/mod.rs     |   1 +
 2 files changed, 663 insertions(+)
 create mode 100644 meilisearch/tests/search/locales.rs

diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs
new file mode 100644
index 000000000..722694ba3
--- /dev/null
+++ b/meilisearch/tests/search/locales.rs
@@ -0,0 +1,662 @@
+use meili_snap::*;
+use once_cell::sync::Lazy;
+
+use crate::common::{Server, Value};
+use crate::json;
+
+static NESTED_DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+    json!([
+        {
+            "id": 852,
+            "document_en": {
+                    "name": "Attack on Titan",
+                    "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+                    "author": "Hajime Isayama",
+                },
+            "document_ja": {
+                "name": "進撃の巨人",
+                "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+                "author": "諫山 創",
+            },
+            "document_zh": {
+                "name": "进击的巨人",
+                "description": "进击的巨人是日本的漫画系列，由諫山 創作画。",
+                "author": "諫山創",
+            },
+            "_vectors": { "manual": [1, 2, 3]},
+        },
+        {
+            "id": 654,
+            "document_en":
+                {
+                    "name": "One Piece",
+                    "description": "One Piece is a Japanese manga series written and illustrated by Eiichiro Oda",
+                    "author": "Eiichiro Oda",
+                },
+            "document_ja": {
+                "name": "ワンピース",
+                "description": "ワンピースは、日本の漫画シリーズであり、尾田 栄一郎によって作画されている。",
+                "author": "尾田 栄一郎",
+            },
+            "document_zh": {
+                "name": "ONE PIECE",
+                "description": "海贼王》是尾田荣一郎创作的日本漫画系列。",
+                "author": "尾田 栄一郎",
+            },
+            "_vectors": { "manual": [1, 2, 54] },
+        }
+    ])
+});
+
+static DOCUMENTS: Lazy<Value> = Lazy::new(|| {
+    json!([
+        {
+            "id": 852,
+            "name_en": "Attack on Titan",
+            "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+            "author_en": "Hajime Isayama",
+            "name_ja": "進撃の巨人",
+            "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+            "author_ja": "諫山 創",
+            "_vectors": { "manual": [1, 2, 3]},
+        },
+        {
+            "id": 853,
+            "name_zh": "进击的巨人",
+            "description_zh": "进击的巨人是日本的漫画系列，由諫山 創作画。",
+            "author_zh": "諫山創",
+            "_vectors": { "manual": [1, 2, 3]},
+        },
+        {
+            "id": 654,
+            "name_en": "One Piece",
+            "description_en": "One Piece is a Japanese manga series written and illustrated by Eiichiro Oda",
+            "author_en": "Eiichiro Oda",
+            "name_ja": "ワンピース",
+            "description_ja": "ワンピースは、日本の漫画シリーズであり、尾田 栄一郎によって作画されている。",
+            "author_ja": "尾田 栄一郎",
+            "_vectors": { "manual": [1, 2, 54] },
+        },
+        {
+            "id": 655,
+            "name_zh": "ONE PIECE",
+            "description_zh": "海贼王》是尾田荣一郎创作的日本漫画系列。",
+            "author_zh": "尾田 栄一郎",
+            "_vectors": { "manual": [1, 2, 54] },
+        }
+    ])
+});
+
+#[actix_rt::test]
+async fn simple_search() {
+    let server = Server::new().await;
+
+    let index = server.index("test");
+    let documents = DOCUMENTS.clone();
+    index
+        .update_settings(
+            json!({"searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"]}),
+        )
+        .await;
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    // english
+    index
+        .search(json!({"q": "Atta", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(response, @r###"
+            {
+              "hits": [
+                {
+                  "id": 852
+                }
+              ],
+              "query": "Atta",
+              "processingTimeMs": "[duration]",
+              "limit": 20,
+              "offset": 0,
+              "estimatedTotalHits": 1
+            }
+            "###);
+            snapshot!(code, @"200 OK");
+        })
+        .await;
+
+    // japanese
+    index
+        .search(json!({"q": "進撃", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(response, @r###"
+            {
+              "hits": [
+                {
+                  "id": 853
+                }
+              ],
+              "query": "進撃",
+              "processingTimeMs": "[duration]",
+              "limit": 20,
+              "offset": 0,
+              "estimatedTotalHits": 1
+            }
+            "###);
+            snapshot!(code, @"200 OK");
+        })
+        .await;
+
+    index
+        .search(
+            json!({"q": "進撃", "attributesToRetrieve": ["id"], "locales": ["jpn"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+            {
+              "hits": [
+                {
+                  "id": 852
+                },
+                {
+                  "id": 853
+                }
+              ],
+              "query": "進撃",
+              "processingTimeMs": "[duration]",
+              "limit": 20,
+              "offset": 0,
+              "estimatedTotalHits": 2
+            }
+            "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+
+    // chinese
+    index
+        .search(json!({"q": "进击", "attributesToRetrieve": ["id"]}), |response, code| {
+            snapshot!(response, @r###"
+            {
+              "hits": [
+                {
+                  "id": 853
+                },
+                {
+                  "id": 852
+                }
+              ],
+              "query": "进击",
+              "processingTimeMs": "[duration]",
+              "limit": 20,
+              "offset": 0,
+              "estimatedTotalHits": 2
+            }
+            "###);
+            snapshot!(code, @"200 OK");
+        })
+        .await;
+}
+
+#[actix_rt::test]
+async fn force_locales() {
+    let server = Server::new().await;
+
+    let index = server.index("test");
+    let documents = DOCUMENTS.clone();
+    let (response, _) = index
+        .update_settings(
+            json!({
+                "searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"],
+                "localizedAttributes": [
+                    // force japanese
+                    {"attributePatterns": ["name_ja", "name_zh", "author_ja", "author_zh", "description_ja", "description_zh"], "locales": ["jpn"]}
+                ]
+            }),
+        )
+        .await;
+    snapshot!(response, @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    // chinese detection
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 0
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+
+    // force japanese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"], "locales": ["jpn"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [
+                    {
+                      "id": 853
+                    }
+                  ],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 1
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+}
+
+#[actix_rt::test]
+async fn force_locales_with_pattern() {
+    let server = Server::new().await;
+
+    let index = server.index("test");
+    let documents = DOCUMENTS.clone();
+    let (response, _) = index
+        .update_settings(
+            json!({
+                "searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"],
+                "localizedAttributes": [
+                    // force japanese
+                    {"attributePatterns": ["*_ja", "*_zh"], "locales": ["jpn"]}
+                ]
+            }),
+        )
+        .await;
+    snapshot!(response, @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    // chinese detection
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 0
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+
+    // force japanese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"], "locales": ["jpn"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [
+                    {
+                      "id": 853
+                    }
+                  ],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 1
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+}
+
+#[actix_rt::test]
+async fn force_locales_with_pattern_nested() {
+    let server = Server::new().await;
+
+    let index = server.index("test");
+    let documents = NESTED_DOCUMENTS.clone();
+    let (response, _) = index
+        .update_settings(json!({
+            "searchableAttributes": ["document_en", "document_ja", "document_zh"],
+            "localizedAttributes": [
+                // force japanese
+                {"attributePatterns": ["document_ja.*", "*_zh.*"], "locales": ["jpn"]}
+            ]
+        }))
+        .await;
+    snapshot!(response, @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    // chinese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"], "locales": ["cmn"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 0
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+
+    // force japanese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"], "locales": ["jpn"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [
+                    {
+                      "id": 852
+                    }
+                  ],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 1
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+}
+
+#[actix_rt::test]
+async fn invalid_locales() {
+    let server = Server::new().await;
+
+    let index = server.index("test");
+    let documents = DOCUMENTS.clone();
+    index
+        .update_settings(
+            json!({"searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"]}),
+        )
+        .await;
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    let (response, code) = index
+        .search_post(json!({"q": "Atta", "attributesToRetrieve": ["id"], "locales": ["invalid"]}))
+        .await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Unknown value `invalid` at `.locales[0]`: expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`",
+      "code": "invalid_search_locales",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_locales"
+    }
+    "###);
+
+    let (response, code) = index
+        .search_get(
+            &yaup::to_string(
+                &json!({"q": "Atta", "attributesToRetrieve": ["id"], "locales": ["invalid"]}),
+            )
+            .unwrap(),
+        )
+        .await;
+    snapshot!(code, @"400 Bad Request");
+    snapshot!(json_string!(response), @r###"
+    {
+      "message": "Invalid value in parameter `locales`: Unknown value `invalid`, expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`",
+      "code": "invalid_search_locales",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_search_locales"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn invalid_localized_attributes_rules() {
+    let server = Server::new().await;
+
+    let index = server.index("test");
+    let (response, _) = index
+        .update_settings(json!({
+            "localizedAttributes": [
+                {"attributePatterns": ["*_ja", "*_zh"], "locales": ["japan"]}
+            ]
+        }))
+        .await;
+    snapshot!(response, @r###"
+    {
+      "message": "Unknown value `japan` at `.localizedAttributes[0].locales[0]`: expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`",
+      "code": "invalid_settings_localized_attributes",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes"
+    }
+    "###);
+
+    let (response, _) = index
+        .update_settings(json!({
+            "localizedAttributes": [
+                {"attributePatterns": ["*_ja", "*_zh"], "locales": "jpn"}
+            ]
+        }))
+        .await;
+    snapshot!(response, @r###"
+    {
+      "message": "Invalid value type at `.localizedAttributes[0].locales`: expected an array, but found a string: `\"jpn\"`",
+      "code": "invalid_settings_localized_attributes",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes"
+    }
+    "###);
+
+    let (response, _) = index
+        .update_settings(json!({
+            "localizedAttributes": [
+                {"attributePatterns": "*_ja", "locales": ["jpn"]}
+            ]
+        }))
+        .await;
+    snapshot!(response, @r###"
+    {
+      "message": "Invalid value type at `.localizedAttributes[0].attributePatterns`: expected an array, but found a string: `\"*_ja\"`",
+      "code": "invalid_settings_localized_attributes",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes"
+    }
+    "###);
+
+    let (response, _) = index
+        .update_settings(json!({
+            "localizedAttributes": [
+                {"locales": ["jpn"]}
+            ]
+        }))
+        .await;
+    snapshot!(response, @r###"
+    {
+      "message": "Missing field `attributePatterns` inside `.localizedAttributes[0]`",
+      "code": "invalid_settings_localized_attributes",
+      "type": "invalid_request",
+      "link": "https://docs.meilisearch.com/errors#invalid_settings_localized_attributes"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn simple_facet_search() {
+    let server = Server::new().await;
+
+    let index = server.index("test");
+    let documents = DOCUMENTS.clone();
+    let (response, _) = index
+        .update_settings(json!({
+            "filterableAttributes": ["name_en", "name_ja", "name_zh"],
+        }))
+        .await;
+    snapshot!(response, @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    let (response, _) = index
+        .facet_search(json!({"facetName": "name_zh", "facetQuery": "進撃", "locales": ["cmn"]}))
+        .await;
+
+    snapshot!(response, @r###"
+    {
+      "facetHits": [
+        {
+          "value": "进击的巨人",
+          "count": 1
+        }
+      ],
+      "facetQuery": "進撃",
+      "processingTimeMs": "[duration]"
+    }
+    "###);
+
+    let (response, _) = index
+        .facet_search(json!({"facetName": "name_zh", "facetQuery": "進撃", "locales": ["jpn"]}))
+        .await;
+
+    snapshot!(response, @r###"
+    {
+      "facetHits": [
+        {
+          "value": "进击的巨人",
+          "count": 1
+        }
+      ],
+      "facetQuery": "進撃",
+      "processingTimeMs": "[duration]"
+    }
+    "###);
+}
+
+#[actix_rt::test]
+async fn facet_search_with_localized_attributes() {
+    let server = Server::new().await;
+
+    let index = server.index("test");
+    let documents = DOCUMENTS.clone();
+    let (response, _) = index
+        .update_settings(json!({
+            "filterableAttributes": ["name_ja", "name_zh"],
+            "localizedAttributes": [
+                // force japanese
+                {"attributePatterns": ["*_ja", "*_zh"], "locales": ["jpn"]}
+            ]
+        }))
+        .await;
+    snapshot!(response, @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    let (response, _) = index
+        .facet_search(json!({"facetName": "name_zh", "facetQuery": "进击", "locales": ["cmn"]}))
+        .await;
+
+    snapshot!(response, @r###"
+    {
+      "facetHits": [],
+      "facetQuery": "进击",
+      "processingTimeMs": "[duration]"
+    }
+    "###);
+
+    let (response, _) = index
+        .facet_search(json!({"facetName": "name_zh", "facetQuery": "进击", "locales": ["jpn"]}))
+        .await;
+
+    snapshot!(response, @r###"
+    {
+      "facetHits": [
+        {
+          "value": "进击的巨人",
+          "count": 1
+        }
+      ],
+      "facetQuery": "进击",
+      "processingTimeMs": "[duration]"
+    }
+    "###);
+
+    let (response, _) =
+        index.facet_search(json!({"facetName": "name_zh", "facetQuery": "进击"})).await;
+
+    snapshot!(response, @r###"
+    {
+      "facetHits": [
+        {
+          "value": "进击的巨人",
+          "count": 1
+        }
+      ],
+      "facetQuery": "进击",
+      "processingTimeMs": "[duration]"
+    }
+    "###);
+}
diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs
index 7f4648e57..301ef9aa2 100644
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@@ -7,6 +7,7 @@ mod facet_search;
 mod formatted;
 mod geo;
 mod hybrid;
+mod locales;
 mod matching_strategy;
 mod multi;
 mod pagination;

From 90c0a6db7ddf5b0d8535e1a6a20987776f7b3e82 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 23 Jul 2024 14:09:27 +0200
Subject: [PATCH 3/9] Implement localized search

---
 meilisearch-types/src/error.rs                |   1 +
 meilisearch-types/src/lib.rs                  |   1 +
 meilisearch-types/src/locales.rs              | 132 ++++++++++++++++++
 .../src/analytics/segment_analytics.rs        |  22 ++-
 meilisearch/src/routes/indexes/search.rs      |   4 +
 meilisearch/src/search/federated.rs           |  10 +-
 meilisearch/src/search/mod.rs                 |  61 ++++++--
 milli/examples/search.rs                      |   1 +
 milli/src/search/facet/search.rs              |  24 +++-
 milli/src/search/hybrid.rs                    |   1 +
 milli/src/search/mod.rs                       |  11 ++
 milli/src/search/new/matches/mod.rs           |  84 ++++++-----
 milli/src/search/new/mod.rs                   |   8 +-
 .../src/search/new/query_term/parse_query.rs  |   2 +-
 14 files changed, 292 insertions(+), 70 deletions(-)
 create mode 100644 meilisearch-types/src/locales.rs

diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs
index d27d6cd3d..e56949b57 100644
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -256,6 +256,7 @@ InvalidSearchCropLength               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchCropMarker               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchFacets                   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchSemanticRatio            , InvalidRequest       , BAD_REQUEST ;
+InvalidSearchLocales                  , InvalidRequest       , BAD_REQUEST ;
 InvalidFacetSearchFacetName           , InvalidRequest       , BAD_REQUEST ;
 InvalidSimilarId                      , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchFilter                   , InvalidRequest       , BAD_REQUEST ;
diff --git a/meilisearch-types/src/lib.rs b/meilisearch-types/src/lib.rs
index e4f5cbeb4..d6049e667 100644
--- a/meilisearch-types/src/lib.rs
+++ b/meilisearch-types/src/lib.rs
@@ -7,6 +7,7 @@ pub mod features;
 pub mod index_uid;
 pub mod index_uid_pattern;
 pub mod keys;
+pub mod locales;
 pub mod settings;
 pub mod star_or;
 pub mod task_view;
diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs
new file mode 100644
index 000000000..14972fc33
--- /dev/null
+++ b/meilisearch-types/src/locales.rs
@@ -0,0 +1,132 @@
+use deserr::Deserr;
+use serde::{Deserialize, Serialize};
+use serde_json::json;
+
+use milli::LocalizedAttributesRule;
+
+/// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language.
+///
+/// this enum implements `Deserr` in order to be used in the API.
+macro_rules! make_locale {
+
+    ($($language:tt), +) => {
+        #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)]
+        #[deserr(rename_all = camelCase)]
+        #[serde(rename_all = "camelCase")]
+        pub enum Locale {
+            $($language),+,
+        }
+
+        impl From<milli::tokenizer::Language> for Locale {
+            fn from(other: milli::tokenizer::Language) -> Locale {
+                match other {
+                    $(milli::tokenizer::Language::$language => Locale::$language), +
+                }
+            }
+        }
+
+        impl From<Locale> for milli::tokenizer::Language {
+            fn from(other: Locale) -> milli::tokenizer::Language {
+                match other {
+                    $(Locale::$language => milli::tokenizer::Language::$language), +,
+                }
+            }
+        }
+
+        #[derive(Debug)]
+        pub struct LocaleFormatError {
+            pub invalid_locale: String,
+        }
+
+        impl std::fmt::Display for LocaleFormatError {
+            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+                let valid_locales = [$(Locale::$language),+].iter().map(|l| format!("`{}`", json!(l).as_str().unwrap())).collect::<Vec<_>>().join(", ");
+                write!(f, "Unknown value `{}`, expected one of {}", self.invalid_locale, valid_locales)
+            }
+        }
+
+        impl std::error::Error for LocaleFormatError {}
+
+        impl std::str::FromStr for Locale {
+            type Err = LocaleFormatError;
+
+            fn from_str(s: &str) -> Result<Self, Self::Err> {
+                milli::tokenizer::Language::from_code(s).map(Self::from).ok_or(LocaleFormatError {
+                    invalid_locale: s.to_string(),
+                })
+            }
+        }
+    };
+}
+
+make_locale! {
+    Epo,
+    Eng,
+    Rus,
+    Cmn,
+    Spa,
+    Por,
+    Ita,
+    Ben,
+    Fra,
+    Deu,
+    Ukr,
+    Kat,
+    Ara,
+    Hin,
+    Jpn,
+    Heb,
+    Yid,
+    Pol,
+    Amh,
+    Jav,
+    Kor,
+    Nob,
+    Dan,
+    Swe,
+    Fin,
+    Tur,
+    Nld,
+    Hun,
+    Ces,
+    Ell,
+    Bul,
+    Bel,
+    Mar,
+    Kan,
+    Ron,
+    Slv,
+    Hrv,
+    Srp,
+    Mkd,
+    Lit,
+    Lav,
+    Est,
+    Tam,
+    Vie,
+    Urd,
+    Tha,
+    Guj,
+    Uzb,
+    Pan,
+    Aze,
+    Ind,
+    Tel,
+    Pes,
+    Mal,
+    Ori,
+    Mya,
+    Nep,
+    Sin,
+    Khm,
+    Tuk,
+    Aka,
+    Zul,
+    Sna,
+    Afr,
+    Lat,
+    Slk,
+    Cat,
+    Tgl,
+    Hye
+}
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 487eaf003..407b90658 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -1,4 +1,4 @@
-use std::collections::{BinaryHeap, HashMap, HashSet};
+use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet};
 use std::fs;
 use std::mem::take;
 use std::path::{Path, PathBuf};
@@ -10,6 +10,7 @@ use actix_web::HttpRequest;
 use byte_unit::Byte;
 use index_scheduler::IndexScheduler;
 use meilisearch_auth::{AuthController, AuthFilter};
+use meilisearch_types::locales::Locale;
 use meilisearch_types::InstanceUid;
 use once_cell::sync::Lazy;
 use regex::Regex;
@@ -653,6 +654,9 @@ pub struct SearchAggregator {
     // every time a search is done, we increment the counter linked to the used settings
     matching_strategy: HashMap<String, usize>,
 
+    // List of the unique Locales passed as parameter
+    locales: BTreeSet<Locale>,
+
     // pagination
     max_limit: usize,
     max_offset: usize,
@@ -707,6 +711,7 @@ impl SearchAggregator {
             attributes_to_search_on,
             hybrid,
             ranking_score_threshold,
+            locales,
         } = query;
 
         let mut ret = Self::default();
@@ -774,6 +779,10 @@ impl SearchAggregator {
 
         ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
 
+        if let Some(locales) = locales {
+            ret.locales = locales.into_iter().copied().collect();
+        }
+
         ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
         ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
         ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
@@ -859,6 +868,7 @@ impl SearchAggregator {
             total_degraded,
             total_used_negative_operator,
             ranking_score_threshold,
+            ref mut locales,
         } = other;
 
         if self.timestamp.is_none() {
@@ -947,6 +957,9 @@ impl SearchAggregator {
         self.show_ranking_score |= show_ranking_score;
         self.show_ranking_score_details |= show_ranking_score_details;
         self.ranking_score_threshold |= ranking_score_threshold;
+
+        // locales
+        self.locales.append(locales);
     }
 
     pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@@ -991,6 +1004,7 @@ impl SearchAggregator {
             total_degraded,
             total_used_negative_operator,
             ranking_score_threshold,
+            locales,
         } = self;
 
         if total_received == 0 {
@@ -1060,6 +1074,7 @@ impl SearchAggregator {
                 "matching_strategy": {
                     "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
                 },
+                "locales": locales,
                 "scoring": {
                     "show_ranking_score": show_ranking_score,
                     "show_ranking_score_details": show_ranking_score_details,
@@ -1150,6 +1165,7 @@ impl MultiSearchAggregator {
                     attributes_to_search_on: _,
                     hybrid: _,
                     ranking_score_threshold: _,
+                    locales: _,
                 } = query;
 
                 index_uid.as_str()
@@ -1307,6 +1323,7 @@ impl FacetSearchAggregator {
             attributes_to_search_on,
             hybrid,
             ranking_score_threshold,
+            locales,
         } = query;
 
         let mut ret = Self::default();
@@ -1322,7 +1339,8 @@ impl FacetSearchAggregator {
             || *matching_strategy != MatchingStrategy::default()
             || attributes_to_search_on.is_some()
             || hybrid.is_some()
-            || ranking_score_threshold.is_some();
+            || ranking_score_threshold.is_some()
+            || locales.is_some();
 
         ret
     }
diff --git a/meilisearch/src/routes/indexes/search.rs b/meilisearch/src/routes/indexes/search.rs
index 836b96147..e60f95948 100644
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@@ -7,6 +7,7 @@ use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
+use meilisearch_types::locales::Locale;
 use meilisearch_types::milli;
 use meilisearch_types::serde_cs::vec::CS;
 use serde_json::Value;
@@ -89,6 +90,8 @@ pub struct SearchQueryGet {
     pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
     #[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
     pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
+    #[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
+    pub locales: Option<CS<Locale>>,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
@@ -175,6 +178,7 @@ impl From<SearchQueryGet> for SearchQuery {
             attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
             hybrid,
             ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
+            locales: other.locales.map(|o| o.into_iter().collect()),
         }
     }
 }
diff --git a/meilisearch/src/search/federated.rs b/meilisearch/src/search/federated.rs
index 0c623d9cb..58005ec53 100644
--- a/meilisearch/src/search/federated.rs
+++ b/meilisearch/src/search/federated.rs
@@ -380,9 +380,6 @@ pub fn perform_federated_search(
 
         let criteria = index.criteria(&rtxn)?;
 
-        // stuff we need for the hitmaker
-        let script_lang_map = index.script_language(&rtxn)?;
-
         let dictionary = index.dictionary(&rtxn)?;
         let dictionary: Option<Vec<_>> =
             dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
@@ -494,6 +491,7 @@ pub fn perform_federated_search(
                     sort: query.sort,
                     show_ranking_score: query.show_ranking_score,
                     show_ranking_score_details: query.show_ranking_score_details,
+                    locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()),
                 };
 
                 let milli::SearchResult {
@@ -509,11 +507,7 @@ pub fn perform_federated_search(
                 degraded |= query_degraded;
                 used_negative_operator |= query_used_negative_operator;
 
-                let tokenizer = HitMaker::tokenizer(
-                    &script_lang_map,
-                    dictionary.as_deref(),
-                    separators.as_deref(),
-                );
+                let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
 
                 let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
 
diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs
index 6624188ce..d28d888aa 100644
--- a/meilisearch/src/search/mod.rs
+++ b/meilisearch/src/search/mod.rs
@@ -1,6 +1,6 @@
 use core::fmt;
 use std::cmp::min;
-use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::collections::{BTreeMap, BTreeSet, HashSet};
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
@@ -15,16 +15,17 @@ use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::{Code, ResponseError};
 use meilisearch_types::heed::RoTxn;
 use meilisearch_types::index_uid::IndexUid;
+use meilisearch_types::locales::Locale;
 use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
 use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
 use meilisearch_types::milli::vector::Embedder;
 use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
 use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
 use meilisearch_types::{milli, Document};
-use milli::tokenizer::TokenizerBuilder;
+use milli::tokenizer::{Language, TokenizerBuilder};
 use milli::{
-    AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder,
-    SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
+    AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, LocalizedAttributesRule,
+    MatchBounds, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
 };
 use regex::Regex;
 use serde::Serialize;
@@ -100,6 +101,8 @@ pub struct SearchQuery {
     pub attributes_to_search_on: Option<Vec<String>>,
     #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
     pub ranking_score_threshold: Option<RankingScoreThreshold>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
+    pub locales: Option<Vec<Locale>>,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Deserr)]
@@ -169,6 +172,7 @@ impl fmt::Debug for SearchQuery {
             matching_strategy,
             attributes_to_search_on,
             ranking_score_threshold,
+            locales,
         } = self;
 
         let mut debug = f.debug_struct("SearchQuery");
@@ -250,6 +254,10 @@ impl fmt::Debug for SearchQuery {
             debug.field("ranking_score_threshold", &ranking_score_threshold);
         }
 
+        if let Some(locales) = locales {
+            debug.field("locales", &locales);
+        }
+
         debug.finish()
     }
 }
@@ -425,6 +433,8 @@ pub struct SearchQueryWithIndex {
     pub attributes_to_search_on: Option<Vec<String>>,
     #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
     pub ranking_score_threshold: Option<RankingScoreThreshold>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
+    pub locales: Option<Vec<Locale>>,
 
     #[deserr(default)]
     pub federation_options: Option<FederationOptions>,
@@ -477,6 +487,7 @@ impl SearchQueryWithIndex {
             attributes_to_search_on,
             hybrid,
             ranking_score_threshold,
+            locales,
         } = self;
         (
             index_uid,
@@ -506,6 +517,7 @@ impl SearchQueryWithIndex {
                 attributes_to_search_on,
                 hybrid,
                 ranking_score_threshold,
+                locales,
                 // do not use ..Default::default() here,
                 // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
             },
@@ -866,6 +878,10 @@ fn prepare_search<'t>(
         search.sort_criteria(sort);
     }
 
+    if let Some(ref locales) = query.locales {
+        search.locales(locales.iter().copied().map(Into::into).collect());
+    }
+
     Ok((search, is_finite_pagination, max_total_hits, offset))
 }
 
@@ -917,6 +933,7 @@ pub fn perform_search(
         highlight_pre_tag,
         highlight_post_tag,
         crop_marker,
+        locales,
         // already used in prepare_search
         vector: _,
         hybrid: _,
@@ -941,6 +958,7 @@ pub fn perform_search(
         sort,
         show_ranking_score,
         show_ranking_score_details,
+        locales: locales.map(|l| l.iter().copied().map(Into::into).collect()),
     };
 
     let documents = make_hits(
@@ -1046,6 +1064,7 @@ struct AttributesFormat {
     sort: Option<Vec<String>>,
     show_ranking_score: bool,
     show_ranking_score_details: bool,
+    locales: Option<Vec<Language>>,
 }
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@@ -1093,19 +1112,16 @@ struct HitMaker<'a> {
     show_ranking_score_details: bool,
     sort: Option<Vec<String>>,
     show_matches_position: bool,
+    locales: Option<Vec<Language>>,
 }
 
 impl<'a> HitMaker<'a> {
     pub fn tokenizer<'b>(
-        script_lang_map: &'b HashMap<milli::tokenizer::Script, Vec<milli::tokenizer::Language>>,
         dictionary: Option<&'b [&'b str]>,
         separators: Option<&'b [&'b str]>,
     ) -> milli::tokenizer::Tokenizer<'b> {
         let mut tokenizer_builder = TokenizerBuilder::default();
         tokenizer_builder.create_char_map(true);
-        if !script_lang_map.is_empty() {
-            tokenizer_builder.allow_list(script_lang_map);
-        }
 
         if let Some(separators) = separators {
             tokenizer_builder.separators(separators);
@@ -1218,6 +1234,7 @@ impl<'a> HitMaker<'a> {
             show_ranking_score_details: format.show_ranking_score_details,
             show_matches_position: format.show_matches_position,
             sort: format.sort,
+            locales: format.locales,
         })
     }
 
@@ -1280,6 +1297,7 @@ impl<'a> HitMaker<'a> {
             &self.formatted_options,
             self.show_matches_position,
             &self.displayed_ids,
+            self.locales.as_deref(),
         )?;
 
         if let Some(sort) = self.sort.as_ref() {
@@ -1312,8 +1330,6 @@ fn make_hits<'a>(
 ) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
     let mut documents = Vec::new();
 
-    let script_lang_map = index.script_language(rtxn)?;
-
     let dictionary = index.dictionary(rtxn)?;
     let dictionary: Option<Vec<_>> =
         dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
@@ -1321,8 +1337,7 @@ fn make_hits<'a>(
     let separators: Option<Vec<_>> =
         separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
 
-    let tokenizer =
-        HitMaker::tokenizer(&script_lang_map, dictionary.as_deref(), separators.as_deref());
+    let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
 
     let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
 
@@ -1341,6 +1356,7 @@ pub fn perform_facet_search(
     facet_name: String,
     search_kind: SearchKind,
     features: RoFeatures,
+    locales: Option<Vec<Language>>,
 ) -> Result<FacetSearchResult, ResponseError> {
     let before_search = Instant::now();
     let rtxn = index.read_txn()?;
@@ -1363,6 +1379,10 @@ pub fn perform_facet_search(
         facet_search.max_values(max_facets as usize);
     }
 
+    if let Some(locales) = locales {
+        facet_search.locales(locales);
+    }
+
     Ok(FacetSearchResult {
         facet_hits: facet_search.execute()?,
         facet_query,
@@ -1443,6 +1463,7 @@ pub fn perform_similar(
         sort: None,
         show_ranking_score,
         show_ranking_score_details,
+        locales: None,
     };
 
     let hits = make_hits(
@@ -1631,6 +1652,7 @@ fn format_fields(
     formatted_options: &BTreeMap<FieldId, FormatOptions>,
     compute_matches: bool,
     displayable_ids: &BTreeSet<FieldId>,
+    locales: Option<&[Language]>,
 ) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> {
     let mut matches_position = compute_matches.then(BTreeMap::new);
     let mut document = document.clone();
@@ -1664,6 +1686,14 @@ fn format_fields(
         let mut infos = Vec::new();
 
         *value = format_value(std::mem::take(value), builder, format, &mut infos, compute_matches);
+        *value = format_value(
+            std::mem::take(value),
+            builder,
+            format,
+            &mut infos,
+            compute_matches,
+            locales,
+        );
 
         if let Some(matches) = matches_position.as_mut() {
             if !infos.is_empty() {
@@ -1688,10 +1718,11 @@ fn format_value(
     format_options: Option<FormatOptions>,
     infos: &mut Vec<MatchBounds>,
     compute_matches: bool,
+    locales: Option<&[Language]>,
 ) -> Value {
     match value {
         Value::String(old_string) => {
-            let mut matcher = builder.build(&old_string);
+            let mut matcher = builder.build(&old_string, locales);
             if compute_matches {
                 let matches = matcher.matches();
                 infos.extend_from_slice(&matches[..]);
@@ -1718,6 +1749,7 @@ fn format_value(
                         }),
                         infos,
                         compute_matches,
+                        locales,
                     )
                 })
                 .collect(),
@@ -1737,6 +1769,7 @@ fn format_value(
                             }),
                             infos,
                             compute_matches,
+                            locales,
                         ),
                     )
                 })
@@ -1745,7 +1778,7 @@ fn format_value(
         Value::Number(number) => {
             let s = number.to_string();
 
-            let mut matcher = builder.build(&s);
+            let mut matcher = builder.build(&s, locales);
             if compute_matches {
                 let matches = matcher.matches();
                 infos.extend_from_slice(&matches[..]);
diff --git a/milli/examples/search.rs b/milli/examples/search.rs
index 87020994a..bb374f629 100644
--- a/milli/examples/search.rs
+++ b/milli/examples/search.rs
@@ -68,6 +68,7 @@ fn main() -> Result<(), Box<dyn Error>> {
                 logger,
                 TimeBudget::max(),
                 None,
+                None,
             )?;
             if let Some((logger, dir)) = detailed_logger {
                 logger.finish(&mut ctx, Path::new(dir))?;
diff --git a/milli/src/search/facet/search.rs b/milli/src/search/facet/search.rs
index a6756a7af..6ef62e39a 100644
--- a/milli/src/search/facet/search.rs
+++ b/milli/src/search/facet/search.rs
@@ -3,7 +3,7 @@ use std::collections::BinaryHeap;
 use std::ops::ControlFlow;
 
 use charabia::normalizer::NormalizerOption;
-use charabia::Normalize;
+use charabia::{Language, Normalize, StrDetection, Token};
 use fst::automaton::{Automaton, Str};
 use fst::{IntoStreamer, Streamer};
 use roaring::RoaringBitmap;
@@ -23,6 +23,7 @@ pub struct SearchForFacetValues<'a> {
     search_query: Search<'a>,
     max_values: usize,
     is_hybrid: bool,
+    locales: Option<Vec<Language>>,
 }
 
 impl<'a> SearchForFacetValues<'a> {
@@ -37,6 +38,7 @@ impl<'a> SearchForFacetValues<'a> {
             search_query,
             max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
             is_hybrid,
+            locales: None,
         }
     }
 
@@ -50,6 +52,11 @@ impl<'a> SearchForFacetValues<'a> {
         self
     }
 
+    pub fn locales(&mut self, locales: Vec<Language>) -> &mut Self {
+        self.locales = Some(locales);
+        self
+    }
+
     fn one_original_value_of(
         &self,
         field_id: FieldId,
@@ -109,8 +116,7 @@ impl<'a> SearchForFacetValues<'a> {
 
         match self.query.as_ref() {
             Some(query) => {
-                let options = NormalizerOption { lossy: true, ..Default::default() };
-                let query = query.normalize(&options);
+                let query = normalize_facet_string(query, self.locales.as_deref());
                 let query = query.as_ref();
 
                 let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
@@ -330,3 +336,15 @@ impl ValuesCollection {
         }
     }
 }
+fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
+    let options = NormalizerOption { lossy: true, ..Default::default() };
+    let mut detection = StrDetection::new(facet_string, locales);
+    let token = Token {
+        lemma: std::borrow::Cow::Borrowed(facet_string),
+        script: detection.script(),
+        language: detection.language(),
+        ..Default::default()
+    };
+
+    token.normalize(&options).lemma.to_string()
+}
diff --git a/milli/src/search/hybrid.rs b/milli/src/search/hybrid.rs
index 2102bf479..e08111473 100644
--- a/milli/src/search/hybrid.rs
+++ b/milli/src/search/hybrid.rs
@@ -174,6 +174,7 @@ impl<'a> Search<'a> {
             semantic: self.semantic.clone(),
             time_budget: self.time_budget.clone(),
             ranking_score_threshold: self.ranking_score_threshold,
+            locales: self.locales.clone(),
         };
 
         let semantic = search.semantic.take();
diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs
index 2b2afa607..0f5eb23e1 100644
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@@ -1,6 +1,7 @@
 use std::fmt;
 use std::sync::Arc;
 
+use charabia::Language;
 use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
 use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;
@@ -52,6 +53,7 @@ pub struct Search<'a> {
     semantic: Option<SemanticSearch>,
     time_budget: TimeBudget,
     ranking_score_threshold: Option<f64>,
+    locales: Option<Vec<Language>>,
 }
 
 impl<'a> Search<'a> {
@@ -72,6 +74,7 @@ impl<'a> Search<'a> {
             rtxn,
             index,
             semantic: None,
+            locales: None,
             time_budget: TimeBudget::max(),
             ranking_score_threshold: None,
         }
@@ -160,6 +163,11 @@ impl<'a> Search<'a> {
         self
     }
 
+    pub fn locales(&mut self, locales: Vec<Language>) -> &mut Search<'a> {
+        self.locales = Some(locales);
+        self
+    }
+
     pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
         if has_vector_search {
             let ctx = SearchContext::new(self.index, self.rtxn)?;
@@ -232,6 +240,7 @@ impl<'a> Search<'a> {
                 &mut DefaultSearchLogger,
                 self.time_budget.clone(),
                 self.ranking_score_threshold,
+                self.locales.as_ref(),
             )?,
         };
 
@@ -272,6 +281,7 @@ impl fmt::Debug for Search<'_> {
             semantic,
             time_budget,
             ranking_score_threshold,
+            locales,
         } = self;
         f.debug_struct("Search")
             .field("query", query)
@@ -292,6 +302,7 @@ impl fmt::Debug for Search<'_> {
             )
             .field("time_budget", time_budget)
             .field("ranking_score_threshold", ranking_score_threshold)
+            .field("locales", locales)
             .finish()
     }
 }
diff --git a/milli/src/search/new/matches/mod.rs b/milli/src/search/new/matches/mod.rs
index 7bc4d9c5d..4688b8f32 100644
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@@ -1,6 +1,6 @@
 use std::borrow::Cow;
 
-use charabia::{SeparatorKind, Token, Tokenizer};
+use charabia::{Language, SeparatorKind, Token, Tokenizer};
 pub use matching_words::MatchingWords;
 use matching_words::{MatchType, PartialMatch, WordId};
 use serde::Serialize;
@@ -46,7 +46,11 @@ impl<'m> MatcherBuilder<'m> {
         self
     }
 
-    pub fn build<'t>(&self, text: &'t str) -> Matcher<'t, 'm, '_> {
+    pub fn build<'t, 'lang>(
+        &self,
+        text: &'t str,
+        locales: Option<&'lang [Language]>,
+    ) -> Matcher<'t, 'm, '_, 'lang> {
         let crop_marker = match &self.crop_marker {
             Some(marker) => marker.as_str(),
             None => DEFAULT_CROP_MARKER,
@@ -68,6 +72,7 @@ impl<'m> MatcherBuilder<'m> {
             highlight_prefix,
             highlight_suffix,
             matches: None,
+            locales,
         }
     }
 }
@@ -107,17 +112,18 @@ pub struct MatchBounds {
 
 /// Structure used to analyze a string, compute words that match,
 /// and format the source string, returning a highlighted and cropped sub-string.
-pub struct Matcher<'t, 'tokenizer, 'b> {
+pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
     text: &'t str,
     matching_words: &'b MatchingWords,
     tokenizer: &'b Tokenizer<'tokenizer>,
+    locales: Option<&'lang [Language]>,
     crop_marker: &'b str,
     highlight_prefix: &'b str,
     highlight_suffix: &'b str,
     matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
 }
 
-impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> {
+impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
     /// Iterates over tokens and save any of them that matches the query.
     fn compute_matches(&mut self) -> &mut Self {
         /// some words are counted as matches only if they are close together and in the good order,
@@ -173,7 +179,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> {
             false
         }
 
-        let tokens: Vec<_> = self.tokenizer.tokenize(self.text).collect();
+        let tokens: Vec<_> =
+            self.tokenizer.tokenize_with_allow_list(self.text, self.locales).collect();
         let mut matches = Vec::new();
 
         let mut words_positions = tokens
@@ -530,6 +537,7 @@ mod tests {
                 &mut crate::DefaultSearchLogger,
                 TimeBudget::max(),
                 None,
+                None,
             )
             .unwrap();
 
@@ -553,19 +561,19 @@ mod tests {
 
         // Text without any match.
         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no crop and no highlight should return complete text.
         assert_eq!(&matcher.format(format_options), &text);
 
         // Text containing all matches.
         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no crop and no highlight should return complete text.
         assert_eq!(&matcher.format(format_options), &text);
 
         // Text containing some matches.
         let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no crop and no highlight should return complete text.
         assert_eq!(&matcher.format(format_options), &text);
     }
@@ -580,23 +588,23 @@ mod tests {
 
         // empty text.
         let text = "";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         assert_eq!(&matcher.format(format_options), "");
 
         // text containing only separators.
         let text = ":-)";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         assert_eq!(&matcher.format(format_options), ":-)");
 
         // Text without any match.
         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no crop should return complete text, because there is no matches.
         assert_eq!(&matcher.format(format_options), &text);
 
         // Text containing all matches.
         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no crop should return complete text with highlighted matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -605,7 +613,7 @@ mod tests {
 
         // Text containing some matches.
         let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no crop should return complete text with highlighted matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -622,7 +630,7 @@ mod tests {
 
         // Text containing prefix match.
         let text = "Ŵôřlḑôle";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no crop should return complete text with highlighted matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -631,7 +639,7 @@ mod tests {
 
         // Text containing unicode match.
         let text = "Ŵôřlḑ";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no crop should return complete text with highlighted matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -643,7 +651,7 @@ mod tests {
 
         // Text containing unicode match.
         let text = "Westfália";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no crop should return complete text with highlighted matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -661,7 +669,7 @@ mod tests {
 
         // empty text.
         let text = "";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
             @""
@@ -669,7 +677,7 @@ mod tests {
 
         // text containing only separators.
         let text = ":-)";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
             @":-)"
@@ -677,7 +685,7 @@ mod tests {
 
         // Text without any match.
         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no highlight should return 10 first words with a marker at the end.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -686,7 +694,7 @@ mod tests {
 
         // Text without any match starting by a separator.
         let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no highlight should return 10 first words with a marker at the end.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -695,7 +703,7 @@ mod tests {
 
         // Test phrase propagation
         let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // should crop the phrase instead of croping around the match.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -704,7 +712,7 @@ mod tests {
 
         // Text containing some matches.
         let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no highlight should return 10 last words with a marker at the start.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -713,7 +721,7 @@ mod tests {
 
         // Text containing all matches.
         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // no highlight should return 10 last words with a marker at the start.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -722,7 +730,7 @@ mod tests {
 
         // Text containing a match unordered and a match ordered.
         let text = "The world split void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // crop should return 10 last words with a marker at the start.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -731,7 +739,7 @@ mod tests {
 
         // Text containing matches with different density.
         let text = "split void the void void world void void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // crop should return 10 last words with a marker at the start.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -740,7 +748,7 @@ mod tests {
 
         // Text containing matches with same word.
         let text = "split split split split split split void void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // crop should return 10 last words with a marker at the start.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -758,7 +766,7 @@ mod tests {
 
         // empty text.
         let text = "";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
             @""
@@ -766,7 +774,7 @@ mod tests {
 
         // text containing only separators.
         let text = ":-)";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
             @":-)"
@@ -774,7 +782,7 @@ mod tests {
 
         // Text without any match.
         let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // both should return 10 first words with a marker at the end.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -783,7 +791,7 @@ mod tests {
 
         // Text containing some matches.
         let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // both should return 10 last words with a marker at the start and highlighted matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -792,7 +800,7 @@ mod tests {
 
         // Text containing all matches.
         let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // both should return 10 last words with a marker at the start and highlighted matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -801,7 +809,7 @@ mod tests {
 
         // Text containing a match unordered and a match ordered.
         let text = "The world split void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // crop should return 10 last words with a marker at the start.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -824,7 +832,7 @@ mod tests {
         let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
 
         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // should return 10 words with a marker at the start as well the end, and the highlighted matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -832,7 +840,7 @@ mod tests {
         );
 
         let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // should highlight "those" and the phrase "and those".
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -851,7 +859,7 @@ mod tests {
 
         // set a smaller crop size
         let format_options = FormatOptions { highlight: false, crop: Some(2) };
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // because crop size < query size, partially format matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -860,7 +868,7 @@ mod tests {
 
         // set a smaller crop size
         let format_options = FormatOptions { highlight: false, crop: Some(1) };
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // because crop size < query size, partially format matches.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -869,7 +877,7 @@ mod tests {
 
         // set  crop size to 0
         let format_options = FormatOptions { highlight: false, crop: Some(0) };
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         // because crop size is 0, crop is ignored.
         insta::assert_snapshot!(
             matcher.format(format_options),
@@ -889,7 +897,7 @@ mod tests {
         let format_options = FormatOptions { highlight: true, crop: None };
 
         let text = "the do or die can't be he do and or isn't he";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
         insta::assert_snapshot!(
             matcher.format(format_options),
             @"_the_ _do_ _or_ die can't be he do and or isn'_t_ _he_"
diff --git a/milli/src/search/new/mod.rs b/milli/src/search/new/mod.rs
index 78b7a0446..577e12a39 100644
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@@ -24,7 +24,7 @@ mod tests;
 use std::collections::HashSet;
 
 use bucket_sort::{bucket_sort, BucketSortOutput};
-use charabia::TokenizerBuilder;
+use charabia::{Language, TokenizerBuilder};
 use db_cache::DatabaseCache;
 use exact_attribute::ExactAttribute;
 use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
@@ -639,6 +639,7 @@ pub fn execute_search(
     query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
     time_budget: TimeBudget,
     ranking_score_threshold: Option<f64>,
+    locales: Option<&Vec<Language>>,
 ) -> Result<PartialSearchResult> {
     check_sort_criteria(ctx, sort_criteria.as_ref())?;
 
@@ -670,9 +671,8 @@ pub fn execute_search(
             tokbuilder.words_dict(dictionary);
         }
 
-        let languages = ctx.index.languages(ctx.txn)?;
-        if !languages.is_empty() {
-            tokbuilder.allow_list(&languages);
+        if let Some(locales) = locales {
+            tokbuilder.allow_list(locales);
         }
 
         let tokenizer = tokbuilder.build();
diff --git a/milli/src/search/new/query_term/parse_query.rs b/milli/src/search/new/query_term/parse_query.rs
index d4c1c2f95..bb98f19ce 100644
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@@ -24,7 +24,7 @@ pub struct ExtractedTokens {
 #[tracing::instrument(level = "trace", skip_all, target = "search::query")]
 pub fn located_query_terms_from_tokens(
     ctx: &mut SearchContext<'_>,
-    query: NormalizedTokenIter<'_, '_>,
+    query: NormalizedTokenIter<'_, '_, '_, '_>,
     words_limit: Option<usize>,
 ) -> Result<ExtractedTokens> {
     let nbr_typos = number_of_typos_allowed(ctx)?;

From 04fa44e7eb6568cf76fe52f56c8c0c3270bf32e9 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 23 Jul 2024 14:51:36 +0200
Subject: [PATCH 4/9] Implement localized attributes settings

---
 dump/src/lib.rs                               |   1 +
 dump/src/reader/compat/v5_to_v6.rs            |   1 +
 meilisearch-types/src/error.rs                |   1 +
 meilisearch-types/src/locales.rs              |  26 ++++
 meilisearch-types/src/settings.rs             |  26 +++-
 .../src/routes/indexes/facet_search.rs        |   7 ++
 meilisearch/src/routes/indexes/settings.rs    |  23 ++++
 meilisearch/src/search/mod.rs                 |  22 +++-
 milli/src/heed_codec/mod.rs                   |   2 -
 milli/src/heed_codec/script_language_codec.rs |  39 ------
 milli/src/index.rs                            |  92 ++++----------
 milli/src/lib.rs                              |   4 +
 milli/src/localized_attributes_rules.rs       | 114 ++++++++++++++++++
 milli/src/update/clear_documents.rs           |   2 -
 .../extract/extract_docid_word_positions.rs   |  40 +++---
 .../extract/extract_facet_string_docids.rs    | 110 ++++++++++++-----
 milli/src/update/index_documents/mod.rs       |  38 ------
 milli/src/update/settings.rs                  |  66 +++++++++-
 18 files changed, 405 insertions(+), 209 deletions(-)
 delete mode 100644 milli/src/heed_codec/script_language_codec.rs
 create mode 100644 milli/src/localized_attributes_rules.rs

diff --git a/dump/src/lib.rs b/dump/src/lib.rs
index 722633ec6..a17fcf941 100644
--- a/dump/src/lib.rs
+++ b/dump/src/lib.rs
@@ -286,6 +286,7 @@ pub(crate) mod test {
             pagination: Setting::NotSet,
             embedders: Setting::NotSet,
             search_cutoff_ms: Setting::NotSet,
+            localized_attributes: Setting::NotSet,
             _kind: std::marker::PhantomData,
         };
         settings.check()
diff --git a/dump/src/reader/compat/v5_to_v6.rs b/dump/src/reader/compat/v5_to_v6.rs
index e6e030186..40a055465 100644
--- a/dump/src/reader/compat/v5_to_v6.rs
+++ b/dump/src/reader/compat/v5_to_v6.rs
@@ -379,6 +379,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
                 v5::Setting::NotSet => v6::Setting::NotSet,
             },
             embedders: v6::Setting::NotSet,
+            localized_attributes: v6::Setting::NotSet,
             search_cutoff_ms: v6::Setting::NotSet,
             _kind: std::marker::PhantomData,
         }
diff --git a/meilisearch-types/src/error.rs b/meilisearch-types/src/error.rs
index e56949b57..4d80fe9c9 100644
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@@ -298,6 +298,7 @@ InvalidSettingsSeparatorTokens        , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsDictionary             , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsSynonyms               , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsTypoTolerance          , InvalidRequest       , BAD_REQUEST ;
+InvalidSettingsLocalizedAttributes    , InvalidRequest       , BAD_REQUEST ;
 InvalidState                          , Internal             , INTERNAL_SERVER_ERROR ;
 InvalidStoreFile                      , Internal             , INTERNAL_SERVER_ERROR ;
 InvalidSwapDuplicateIndexFound        , InvalidRequest       , BAD_REQUEST ;
diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs
index 14972fc33..6f7fb3a40 100644
--- a/meilisearch-types/src/locales.rs
+++ b/meilisearch-types/src/locales.rs
@@ -130,3 +130,29 @@ make_locale! {
     Tgl,
     Hye
 }
+
+#[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize)]
+#[deserr(rename_all = camelCase)]
+#[serde(rename_all = "camelCase")]
+pub struct LocalizedAttributesRuleView {
+    pub attribute_patterns: Vec<String>,
+    pub locales: Vec<Locale>,
+}
+
+impl From<LocalizedAttributesRule> for LocalizedAttributesRuleView {
+    fn from(rule: LocalizedAttributesRule) -> Self {
+        Self {
+            attribute_patterns: rule.attribute_patterns,
+            locales: rule.locales.into_iter().map(|l| l.into()).collect(),
+        }
+    }
+}
+
+impl From<LocalizedAttributesRuleView> for LocalizedAttributesRule {
+    fn from(view: LocalizedAttributesRuleView) -> Self {
+        Self {
+            attribute_patterns: view.attribute_patterns,
+            locales: view.locales.into_iter().map(|l| l.into()).collect(),
+        }
+    }
+}
diff --git a/meilisearch-types/src/settings.rs b/meilisearch-types/src/settings.rs
index 8a9708d29..9e7a2bc15 100644
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@@ -17,6 +17,7 @@ use serde::{Deserialize, Serialize, Serializer};
 use crate::deserr::DeserrJsonError;
 use crate::error::deserr_codes::*;
 use crate::facet_values_sort::FacetValuesSort;
+use crate::locales::LocalizedAttributesRuleView;
 
 /// The maximum number of results that the engine
 /// will be able to return in one search call.
@@ -198,6 +199,9 @@ pub struct Settings<T> {
     #[serde(default, skip_serializing_if = "Setting::is_not_set")]
     #[deserr(default, error = DeserrJsonError<InvalidSettingsSearchCutoffMs>)]
     pub search_cutoff_ms: Setting<u64>,
+    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
+    #[deserr(default, error = DeserrJsonError<InvalidSettingsLocalizedAttributes>)]
+    pub localized_attributes: Setting<Vec<LocalizedAttributesRuleView>>,
 
     #[serde(skip)]
     #[deserr(skip)]
@@ -261,6 +265,7 @@ impl Settings<Checked> {
             pagination: Setting::Reset,
             embedders: Setting::Reset,
             search_cutoff_ms: Setting::Reset,
+            localized_attributes: Setting::Reset,
             _kind: PhantomData,
         }
     }
@@ -284,7 +289,8 @@ impl Settings<Checked> {
             pagination,
             embedders,
             search_cutoff_ms,
-            ..
+            localized_attributes: localized_attributes_rules,
+            _kind,
         } = self;
 
         Settings {
@@ -305,6 +311,7 @@ impl Settings<Checked> {
             pagination,
             embedders,
             search_cutoff_ms,
+            localized_attributes: localized_attributes_rules,
             _kind: PhantomData,
         }
     }
@@ -352,6 +359,7 @@ impl Settings<Unchecked> {
             pagination: self.pagination,
             embedders: self.embedders,
             search_cutoff_ms: self.search_cutoff_ms,
+            localized_attributes: self.localized_attributes,
             _kind: PhantomData,
         }
     }
@@ -402,6 +410,7 @@ pub fn apply_settings_to_builder(
         pagination,
         embedders,
         search_cutoff_ms,
+        localized_attributes: localized_attributes_rules,
         _kind,
     } = settings;
 
@@ -485,6 +494,13 @@ pub fn apply_settings_to_builder(
         Setting::NotSet => (),
     }
 
+    match localized_attributes_rules {
+        Setting::Set(ref rules) => builder
+            .set_localized_attributes_rules(rules.iter().cloned().map(|r| r.into()).collect()),
+        Setting::Reset => builder.reset_localized_attributes_rules(),
+        Setting::NotSet => (),
+    }
+
     match typo_tolerance {
         Setting::Set(ref value) => {
             match value.enabled {
@@ -679,6 +695,8 @@ pub fn settings(
 
     let search_cutoff_ms = index.search_cutoff(rtxn)?;
 
+    let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
+
     let mut settings = Settings {
         displayed_attributes: match displayed_attributes {
             Some(attrs) => Setting::Set(attrs),
@@ -711,6 +729,10 @@ pub fn settings(
             Some(cutoff) => Setting::Set(cutoff),
             None => Setting::Reset,
         },
+        localized_attributes: match localized_attributes_rules {
+            Some(rules) => Setting::Set(rules.into_iter().map(|r| r.into()).collect()),
+            None => Setting::Reset,
+        },
         _kind: PhantomData,
     };
 
@@ -902,6 +924,7 @@ pub(crate) mod test {
             faceting: Setting::NotSet,
             pagination: Setting::NotSet,
             embedders: Setting::NotSet,
+            localized_attributes: Setting::NotSet,
             search_cutoff_ms: Setting::NotSet,
             _kind: PhantomData::<Unchecked>,
         };
@@ -930,6 +953,7 @@ pub(crate) mod test {
             faceting: Setting::NotSet,
             pagination: Setting::NotSet,
             embedders: Setting::NotSet,
+            localized_attributes: Setting::NotSet,
             search_cutoff_ms: Setting::NotSet,
             _kind: PhantomData::<Unchecked>,
         };
diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs
index ecb7757af..da575fdc4 100644
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@@ -6,6 +6,7 @@ use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
+use meilisearch_types::locales::Locale;
 use serde_json::Value;
 use tracing::debug;
 
@@ -48,6 +49,8 @@ pub struct FacetSearchQuery {
     pub attributes_to_search_on: Option<Vec<String>>,
     #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
     pub ranking_score_threshold: Option<RankingScoreThreshold>,
+    #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
+    pub locales: Option<Vec<Locale>>,
 }
 
 pub async fn search(
@@ -67,6 +70,7 @@ pub async fn search(
 
     let facet_query = query.facet_query.clone();
     let facet_name = query.facet_name.clone();
+    let locales = query.locales.clone().map(|l| l.into_iter().map(Into::into).collect());
     let mut search_query = SearchQuery::from(query);
 
     // Tenant token search_rules.
@@ -86,6 +90,7 @@ pub async fn search(
             facet_name,
             search_kind,
             index_scheduler.features(),
+            locales
         )
     })
     .await?;
@@ -113,6 +118,7 @@ impl From<FacetSearchQuery> for SearchQuery {
             attributes_to_search_on,
             hybrid,
             ranking_score_threshold,
+            locales,
         } = value;
 
         SearchQuery {
@@ -141,6 +147,7 @@ impl From<FacetSearchQuery> for SearchQuery {
             attributes_to_search_on,
             hybrid,
             ranking_score_threshold,
+            locales,
         }
     }
 }
diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index e35ebc930..b62690295 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -474,6 +474,28 @@ make_setting_route!(
     }
 );
 
+make_setting_route!(
+    "/localized-attributes",
+    put,
+    Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
+    meilisearch_types::deserr::DeserrJsonError<
+        meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes,
+    >,
+    localized_attributes,
+    "localizedAttributes",
+    analytics,
+    |rules: &Option<Vec<meilisearch_types::locales::LocalizedAttributesRuleView>>, req: &HttpRequest| {
+        use serde_json::json;
+        analytics.publish(
+            "LocalizedAttributesRules Updated".to_string(),
+            json!({
+                "locales": rules.as_ref().map(|rules| rules.iter().map(|rule| rule.locales.iter().cloned()).flatten().collect::<std::collections::BTreeSet<_>>())
+            }),
+            Some(req),
+        );
+    }
+);
+
 make_setting_route!(
     "/ranking-rules",
     put,
@@ -786,6 +808,7 @@ pub async fn update_all(
             },
             "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()),
             "search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(),
+            "locales": new_settings.localized_attributes.as_ref().set().map(|rules| rules.into_iter().map(|rule| rule.locales.iter().cloned()).flatten().collect::<std::collections::BTreeSet<_>>()),
         }),
         Some(&req),
     );
diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs
index d28d888aa..11bf4f84e 100644
--- a/meilisearch/src/search/mod.rs
+++ b/meilisearch/src/search/mod.rs
@@ -1290,6 +1290,9 @@ impl<'a> HitMaker<'a> {
             document.insert("_vectors".into(), vectors.into());
         }
 
+        let localized_attributes =
+            self.index.localized_attributes_rules(self.rtxn)?.unwrap_or_default();
+
         let (matches_position, formatted) = format_fields(
             &displayed_document,
             &self.fields_ids_map,
@@ -1298,6 +1301,7 @@ impl<'a> HitMaker<'a> {
             self.show_matches_position,
             &self.displayed_ids,
             self.locales.as_deref(),
+            &localized_attributes,
         )?;
 
         if let Some(sort) = self.sort.as_ref() {
@@ -1365,6 +1369,14 @@ pub fn perform_facet_search(
         None => TimeBudget::default(),
     };
 
+    let localized_attributes = index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
+    let locales = locales.or_else(|| {
+        localized_attributes
+            .into_iter()
+            .find(|attr| attr.match_str(&facet_name))
+            .map(|attr| attr.locales)
+    });
+
     let (search, _, _, _) =
         prepare_search(index, &rtxn, &search_query, &search_kind, time_budget, features)?;
     let mut facet_search = SearchForFacetValues::new(
@@ -1653,6 +1665,7 @@ fn format_fields(
     compute_matches: bool,
     displayable_ids: &BTreeSet<FieldId>,
     locales: Option<&[Language]>,
+    localized_attributes: &[LocalizedAttributesRule],
 ) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> {
     let mut matches_position = compute_matches.then(BTreeMap::new);
     let mut document = document.clone();
@@ -1685,7 +1698,14 @@ fn format_fields(
             .reduce(|acc, option| acc.merge(option));
         let mut infos = Vec::new();
 
-        *value = format_value(std::mem::take(value), builder, format, &mut infos, compute_matches);
+        // if no locales has been provided, we try to find the locales in the localized_attributes.
+        let locales = locales.or_else(|| {
+            localized_attributes
+                .iter()
+                .find(|rule| rule.match_str(key))
+                .map(LocalizedAttributesRule::locales)
+        });
+
         *value = format_value(
             std::mem::take(value),
             builder,
diff --git a/milli/src/heed_codec/mod.rs b/milli/src/heed_codec/mod.rs
index 449d1955c..575b886bd 100644
--- a/milli/src/heed_codec/mod.rs
+++ b/milli/src/heed_codec/mod.rs
@@ -7,7 +7,6 @@ mod fst_set_codec;
 mod obkv_codec;
 mod roaring_bitmap;
 mod roaring_bitmap_length;
-mod script_language_codec;
 mod str_beu32_codec;
 mod str_ref;
 mod str_str_u8_codec;
@@ -26,7 +25,6 @@ pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, Roar
 pub use self::roaring_bitmap_length::{
     BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
 };
-pub use self::script_language_codec::ScriptLanguageCodec;
 pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
 pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
 
diff --git a/milli/src/heed_codec/script_language_codec.rs b/milli/src/heed_codec/script_language_codec.rs
deleted file mode 100644
index 35f7af3c7..000000000
--- a/milli/src/heed_codec/script_language_codec.rs
+++ /dev/null
@@ -1,39 +0,0 @@
-use std::borrow::Cow;
-use std::ffi::CStr;
-use std::str;
-
-use charabia::{Language, Script};
-use heed::BoxedError;
-
-pub struct ScriptLanguageCodec;
-
-impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
-    type DItem = (Script, Language);
-
-    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
-        let cstr = CStr::from_bytes_until_nul(bytes)?;
-        let script = cstr.to_str()?;
-        let script_name = Script::from_name(script);
-        // skip '\0' byte between the two strings.
-        let lan = str::from_utf8(&bytes[script.len() + 1..])?;
-        let lan_name = Language::from_name(lan);
-
-        Ok((script_name, lan_name))
-    }
-}
-
-impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
-    type EItem = (Script, Language);
-
-    fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
-        let script_name = script.name().as_bytes();
-        let lan_name = lan.name().as_bytes();
-
-        let mut bytes = Vec::with_capacity(script_name.len() + lan_name.len() + 1);
-        bytes.extend_from_slice(script_name);
-        bytes.push(0);
-        bytes.extend_from_slice(lan_name);
-
-        Ok(Cow::Owned(bytes))
-    }
-}
diff --git a/milli/src/index.rs b/milli/src/index.rs
index 194f18faa..f5342f2c0 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -4,7 +4,6 @@ use std::convert::TryInto;
 use std::fs::File;
 use std::path::Path;
 
-use charabia::{Language, Script};
 use heed::types::*;
 use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
 use roaring::RoaringBitmap;
@@ -19,9 +18,7 @@ use crate::heed_codec::facet::{
     FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
     FieldIdCodec, OrderedF64Codec,
 };
-use crate::heed_codec::{
-    BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
-};
+use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
 use crate::order_by_map::OrderByMap;
 use crate::proximity::ProximityPrecision;
 use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
@@ -29,8 +26,8 @@ use crate::vector::{Embedding, EmbeddingConfig};
 use crate::{
     default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
     FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
-    FieldidsWeightsMap, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
-    Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
+    FieldidsWeightsMap, GeoPoint, LocalizedAttributesRule, ObkvCodec, Result, RoaringBitmapCodec,
+    RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
 };
 
 pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@@ -73,6 +70,7 @@ pub mod main_key {
     pub const PROXIMITY_PRECISION: &str = "proximity-precision";
     pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
     pub const SEARCH_CUTOFF: &str = "search_cutoff";
+    pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
 }
 
 pub mod db_name {
@@ -101,7 +99,6 @@ pub mod db_name {
     pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
     pub const VECTOR_ARROY: &str = "vector-arroy";
     pub const DOCUMENTS: &str = "documents";
-    pub const SCRIPT_LANGUAGE_DOCIDS: &str = "script_language_docids";
 }
 
 #[derive(Clone)]
@@ -142,9 +139,6 @@ pub struct Index {
     /// Maps the word prefix and a field id with all the docids where the prefix appears inside the field
     pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
 
-    /// Maps the script and language with all the docids that corresponds to it.
-    pub script_language_docids: Database<ScriptLanguageCodec, RoaringBitmapCodec>,
-
     /// Maps the facet field id and the docids for which this field exists
     pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
     /// Maps the facet field id and the docids for which this field is set as null
@@ -198,8 +192,6 @@ impl Index {
             env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?;
         let word_pair_proximity_docids =
             env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
-        let script_language_docids =
-            env.create_database(&mut wtxn, Some(SCRIPT_LANGUAGE_DOCIDS))?;
         let word_position_docids = env.create_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?;
         let word_fid_docids = env.create_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?;
         let field_id_word_count_docids =
@@ -243,7 +235,6 @@ impl Index {
             word_prefix_docids,
             exact_word_prefix_docids,
             word_pair_proximity_docids,
-            script_language_docids,
             word_position_docids,
             word_fid_docids,
             word_prefix_position_docids,
@@ -1562,69 +1553,32 @@ impl Index {
         self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
     }
 
-    /* script  language docids */
-    /// Retrieve all the documents ids that correspond with (Script, Language) key, `None` if it is any.
-    pub fn script_language_documents_ids(
+    pub fn localized_attributes_rules(
         &self,
         rtxn: &RoTxn<'_>,
-        key: &(Script, Language),
-    ) -> heed::Result<Option<RoaringBitmap>> {
-        self.script_language_docids.get(rtxn, key)
+    ) -> heed::Result<Option<Vec<LocalizedAttributesRule>>> {
+        self.main
+            .remap_types::<Str, SerdeBincode<Vec<LocalizedAttributesRule>>>()
+            .get(rtxn, main_key::LOCALIZED_ATTRIBUTES_RULES)
     }
 
-    pub fn script_language(
+    pub(crate) fn put_localized_attributes_rules(
         &self,
-        rtxn: &RoTxn<'_>,
-    ) -> heed::Result<HashMap<Script, Vec<Language>>> {
-        let mut script_language: HashMap<Script, Vec<Language>> = HashMap::new();
-        let mut script_language_doc_count: Vec<(Script, Language, u64)> = Vec::new();
-        let mut total = 0;
-        for sl in self.script_language_docids.iter(rtxn)? {
-            let ((script, language), docids) = sl?;
-
-            // keep only Languages that contains at least 1 document.
-            let remaining_documents_count = docids.len();
-            total += remaining_documents_count;
-            if remaining_documents_count > 0 {
-                script_language_doc_count.push((script, language, remaining_documents_count));
-            }
-        }
-
-        let threshold = total / 20; // 5% (arbitrary)
-        for (script, language, count) in script_language_doc_count {
-            if count > threshold {
-                if let Some(languages) = script_language.get_mut(&script) {
-                    (*languages).push(language);
-                } else {
-                    script_language.insert(script, vec![language]);
-                }
-            }
-        }
-
-        Ok(script_language)
+        txn: &mut RwTxn<'_>,
+        val: Vec<LocalizedAttributesRule>,
+    ) -> heed::Result<()> {
+        self.main.remap_types::<Str, SerdeBincode<Vec<LocalizedAttributesRule>>>().put(
+            txn,
+            main_key::LOCALIZED_ATTRIBUTES_RULES,
+            &val,
+        )
     }
 
-    pub fn languages(&self, rtxn: &RoTxn<'_>) -> heed::Result<Vec<Language>> {
-        let mut script_language_doc_count: Vec<(Language, u64)> = Vec::new();
-        let mut total = 0;
-        for sl in self.script_language_docids.iter(rtxn)? {
-            let ((_script, language), docids) = sl?;
-
-            // keep only Languages that contains at least 1 document.
-            let remaining_documents_count = docids.len();
-            total += remaining_documents_count;
-            if remaining_documents_count > 0 {
-                script_language_doc_count.push((language, remaining_documents_count));
-            }
-        }
-
-        let threshold = total / 20; // 5% (arbitrary)
-
-        Ok(script_language_doc_count
-            .into_iter()
-            .filter(|(_, count)| *count > threshold)
-            .map(|(language, _)| language)
-            .collect())
+    pub(crate) fn delete_localized_attributes_rules(
+        &self,
+        txn: &mut RwTxn<'_>,
+    ) -> heed::Result<bool> {
+        self.main.remap_key_type::<Str>().delete(txn, main_key::LOCALIZED_ATTRIBUTES_RULES)
     }
 
     /// Put the embedding configs:
diff --git a/milli/src/lib.rs b/milli/src/lib.rs
index fcb0da19c..461971ddf 100644
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -16,6 +16,7 @@ pub mod facet;
 mod fields_ids_map;
 pub mod heed_codec;
 pub mod index;
+mod localized_attributes_rules;
 pub mod order_by_map;
 pub mod prompt;
 pub mod proximity;
@@ -69,6 +70,9 @@ pub use self::search::{
     Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
 };
 
+pub use self::localized_attributes_rules::LocalizedAttributesRule;
+use self::localized_attributes_rules::LocalizedFieldIds;
+
 pub type Result<T> = std::result::Result<T, error::Error>;
 
 pub type Attribute = u32;
diff --git a/milli/src/localized_attributes_rules.rs b/milli/src/localized_attributes_rules.rs
new file mode 100644
index 000000000..a3b3e820b
--- /dev/null
+++ b/milli/src/localized_attributes_rules.rs
@@ -0,0 +1,114 @@
+use std::collections::HashMap;
+
+use charabia::Language;
+use serde::{Deserialize, Serialize};
+
+use crate::fields_ids_map::FieldsIdsMap;
+use crate::FieldId;
+
+/// A rule that defines which locales are supported for a given attribute.
+///
+/// The rule is a list of attribute patterns and a list of locales.
+/// The attribute patterns are matched against the attribute name.
+/// The pattern `*` matches any attribute name.
+/// The pattern `attribute_name*` matches any attribute name that starts with `attribute_name`.
+/// The pattern `*attribute_name` matches any attribute name that ends with `attribute_name`.
+/// The pattern `*attribute_name*` matches any attribute name that contains `attribute_name`.
+#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
+pub struct LocalizedAttributesRule {
+    pub attribute_patterns: Vec<String>,
+    pub locales: Vec<Language>,
+}
+
+impl LocalizedAttributesRule {
+    pub fn new(attribute_patterns: Vec<String>, locales: Vec<Language>) -> Self {
+        Self { attribute_patterns, locales }
+    }
+
+    pub fn match_str(&self, str: &str) -> bool {
+        self.attribute_patterns.iter().any(|pattern| match_pattern(pattern.as_str(), str))
+    }
+
+    pub fn locales(&self) -> &[Language] {
+        &self.locales
+    }
+}
+
+fn match_pattern(pattern: &str, str: &str) -> bool {
+    let res = if pattern == "*" {
+        true
+    } else if pattern.starts_with('*') && pattern.ends_with('*') {
+        str.contains(&pattern[1..pattern.len() - 1])
+    } else if pattern.ends_with('*') {
+        str.starts_with(&pattern[..pattern.len() - 1])
+    } else if pattern.starts_with('*') {
+        str.ends_with(&pattern[1..])
+    } else {
+        pattern == str
+    };
+
+    res
+}
+
+#[derive(Debug, Clone, PartialEq, Eq)]
+pub struct LocalizedFieldIds {
+    field_id_to_locales: HashMap<FieldId, Vec<Language>>,
+}
+
+impl LocalizedFieldIds {
+    pub fn new<I: Iterator<Item = FieldId>>(
+        rules: &Option<Vec<LocalizedAttributesRule>>,
+        fields_ids_map: &FieldsIdsMap,
+        fields_ids: I,
+    ) -> Self {
+        let mut field_id_to_locales = HashMap::new();
+
+        if let Some(rules) = rules {
+            let fields = fields_ids.filter_map(|field_id| {
+                fields_ids_map.name(field_id).map(|field_name| (field_id, field_name))
+            });
+
+            for (field_id, field_name) in fields {
+                let mut locales = Vec::new();
+                for rule in rules {
+                    if rule.match_str(field_name) {
+                        locales.extend(rule.locales.iter());
+                    }
+                }
+
+                if !locales.is_empty() {
+                    locales.sort();
+                    locales.dedup();
+                    field_id_to_locales.insert(field_id, locales);
+                }
+            }
+        }
+
+        Self { field_id_to_locales }
+    }
+
+    pub fn locales<'a>(&'a self, fields_id: FieldId) -> Option<&'a [Language]> {
+        self.field_id_to_locales.get(&fields_id).map(Vec::as_slice)
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn test_match_pattern() {
+        assert_eq!(match_pattern("*", "test"), true);
+        assert_eq!(match_pattern("test*", "test"), true);
+        assert_eq!(match_pattern("test*", "testa"), true);
+        assert_eq!(match_pattern("*test", "test"), true);
+        assert_eq!(match_pattern("*test", "atest"), true);
+        assert_eq!(match_pattern("*test*", "test"), true);
+        assert_eq!(match_pattern("*test*", "atesta"), true);
+        assert_eq!(match_pattern("*test*", "atest"), true);
+        assert_eq!(match_pattern("*test*", "testa"), true);
+        assert_eq!(match_pattern("test*test", "test"), false);
+        assert_eq!(match_pattern("*test", "testa"), false);
+        assert_eq!(match_pattern("test*", "atest"), false);
+    }
+}
diff --git a/milli/src/update/clear_documents.rs b/milli/src/update/clear_documents.rs
index 9eca378a5..6c4efb859 100644
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@@ -36,7 +36,6 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
             field_id_word_count_docids,
             word_prefix_position_docids,
             word_prefix_fid_docids,
-            script_language_docids,
             facet_id_f64_docids,
             facet_id_string_docids,
             facet_id_normalized_string_strings,
@@ -83,7 +82,6 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
         field_id_word_count_docids.clear(self.wtxn)?;
         word_prefix_position_docids.clear(self.wtxn)?;
         word_prefix_fid_docids.clear(self.wtxn)?;
-        script_language_docids.clear(self.wtxn)?;
         facet_id_f64_docids.clear(self.wtxn)?;
         facet_id_normalized_string_strings.clear(self.wtxn)?;
         facet_id_string_fst.clear(self.wtxn)?;
diff --git a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
index 748a3886a..ba11ceeb3 100644
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@@ -3,7 +3,7 @@ use std::fs::File;
 use std::io::BufReader;
 use std::{io, mem, str};
 
-use charabia::{Language, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
+use charabia::{SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
 use obkv::{KvReader, KvWriterU16};
 use roaring::RoaringBitmap;
 use serde_json::Value;
@@ -11,7 +11,7 @@ use serde_json::Value;
 use super::helpers::{create_sorter, keep_latest_obkv, sorter_into_reader, GrenadParameters};
 use crate::error::{InternalError, SerializationError};
 use crate::update::del_add::{del_add_from_two_obkvs, DelAdd, KvReaderDelAdd};
-use crate::update::settings::InnerIndexSettingsDiff;
+use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
 use crate::{FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};
 
 /// Extracts the word and positions where this word appear and
@@ -57,13 +57,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
         .map(|s| s.iter().map(String::as_str).collect());
     let old_dictionary: Option<Vec<_>> =
         settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-    let mut del_builder = tokenizer_builder(
-        old_stop_words,
-        old_separators.as_deref(),
-        old_dictionary.as_deref(),
-        None,
-    );
-    let del_tokenizer = del_builder.build();
+    let del_builder =
+        tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref());
+    let del_tokenizer = del_builder.into_tokenizer();
 
     let new_stop_words = settings_diff.new.stop_words.as_ref();
     let new_separators: Option<Vec<_>> = settings_diff
@@ -73,13 +69,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
         .map(|s| s.iter().map(String::as_str).collect());
     let new_dictionary: Option<Vec<_>> =
         settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-    let mut add_builder = tokenizer_builder(
-        new_stop_words,
-        new_separators.as_deref(),
-        new_dictionary.as_deref(),
-        None,
-    );
-    let add_tokenizer = add_builder.build();
+    let add_builder =
+        tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref());
+    let add_tokenizer = add_builder.into_tokenizer();
 
     // iterate over documents.
     let mut cursor = obkv_documents.into_cursor()?;
@@ -107,7 +99,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
                 // deletions
                 tokens_from_document(
                     &obkv,
-                    &settings_diff.old.searchable_fields_ids,
+                    &settings_diff.old,
                     &del_tokenizer,
                     max_positions_per_attributes,
                     DelAdd::Deletion,
@@ -118,7 +110,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
                 // additions
                 tokens_from_document(
                     &obkv,
-                    &settings_diff.new.searchable_fields_ids,
+                    &settings_diff.new,
                     &add_tokenizer,
                     max_positions_per_attributes,
                     DelAdd::Addition,
@@ -180,7 +172,6 @@ fn tokenizer_builder<'a>(
     stop_words: Option<&'a fst::Set<Vec<u8>>>,
     allowed_separators: Option<&'a [&str]>,
     dictionary: Option<&'a [&str]>,
-    languages: Option<&'a Vec<Language>>,
 ) -> TokenizerBuilder<'a, Vec<u8>> {
     let mut tokenizer_builder = TokenizerBuilder::new();
     if let Some(stop_words) = stop_words {
@@ -193,17 +184,13 @@ fn tokenizer_builder<'a>(
         tokenizer_builder.separators(separators);
     }
 
-    if let Some(languages) = languages {
-        tokenizer_builder.allow_list(languages);
-    }
-
     tokenizer_builder
 }
 
 /// Extract words mapped with their positions of a document.
 fn tokens_from_document<'a>(
     obkv: &KvReader<'a, FieldId>,
-    searchable_fields: &[FieldId],
+    settings: &InnerIndexSettings,
     tokenizer: &Tokenizer<'_>,
     max_positions_per_attributes: u32,
     del_add: DelAdd,
@@ -213,7 +200,7 @@ fn tokens_from_document<'a>(
     let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
     for (field_id, field_bytes) in obkv.iter() {
         // if field is searchable.
-        if searchable_fields.as_ref().contains(&field_id) {
+        if settings.searchable_fields_ids.contains(&field_id) {
             // extract deletion or addition only.
             if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
                 // parse json.
@@ -228,7 +215,8 @@ fn tokens_from_document<'a>(
                 buffers.field_buffer.clear();
                 if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
                     // create an iterator of token with their positions.
-                    let tokens = process_tokens(tokenizer.tokenize(field))
+                    let locales = settings.localized_searchable_fields_ids.locales(field_id);
+                    let tokens = process_tokens(tokenizer.tokenize_with_allow_list(field, locales))
                         .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
 
                     for (index, token) in tokens {
diff --git a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
index 3deace127..6452a67a1 100644
--- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@@ -5,6 +5,7 @@ use std::iter::FromIterator;
 use std::{io, str};
 
 use charabia::normalizer::{Normalize, NormalizerOption};
+use charabia::{Language, StrDetection, Token};
 use heed::types::SerdeJson;
 use heed::BytesEncode;
 
@@ -26,10 +27,9 @@ use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
 pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
     docid_fid_facet_string: grenad::Reader<R>,
     indexer: GrenadParameters,
-    _settings_diff: &InnerIndexSettingsDiff,
+    settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
     let max_memory = indexer.max_memory_by_thread();
-    let options = NormalizerOption { lossy: true, ..Default::default() };
 
     let mut facet_string_docids_sorter = create_sorter(
         grenad::SortAlgorithm::Stable,
@@ -54,12 +54,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
     while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
         let deladd_reader = KvReaderDelAdd::new(deladd_original_value_bytes);
 
-        // nothing to do if we delete and re-add the value.
-        if deladd_reader.get(DelAdd::Deletion).is_some()
-            && deladd_reader.get(DelAdd::Addition).is_some()
-        {
-            continue;
-        }
+        let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some()
+            && deladd_reader.get(DelAdd::Addition).is_some();
 
         let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
         let field_id = FieldId::from_be_bytes(field_id_bytes);
@@ -72,29 +68,66 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
 
         // Facet search normalization
         {
-            let mut hyper_normalized_value = normalized_value.normalize(&options);
-            let normalized_truncated_facet: String;
-            if hyper_normalized_value.len() > MAX_FACET_VALUE_LENGTH {
-                normalized_truncated_facet = hyper_normalized_value
-                    .char_indices()
-                    .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
-                    .map(|(_, c)| c)
-                    .collect();
-                hyper_normalized_value = normalized_truncated_facet.into();
-            }
+            let locales = settings_diff.old.localized_faceted_fields_ids.locales(field_id);
+            let old_hyper_normalized_value = normalize_facet_string(normalized_value, locales);
+            let locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
+            let new_hyper_normalized_value = normalize_facet_string(normalized_value, locales);
+
             let set = BTreeSet::from_iter(std::iter::once(normalized_value));
 
-            buffer.clear();
-            let mut obkv = KvWriterDelAdd::new(&mut buffer);
-            for (deladd_key, _) in deladd_reader.iter() {
-                let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
-                obkv.insert(deladd_key, val)?;
-            }
-            obkv.finish()?;
+            // if the facet string is the same, we can put the deletion and addition in the same obkv.
+            if old_hyper_normalized_value == new_hyper_normalized_value {
+                // nothing to do if we delete and re-add the value.
+                if is_same_value {
+                    continue;
+                }
 
-            let key = (field_id, hyper_normalized_value.as_ref());
-            let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
-            normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
+                buffer.clear();
+                let mut obkv = KvWriterDelAdd::new(&mut buffer);
+                for (deladd_key, _) in deladd_reader.iter() {
+                    let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
+                    obkv.insert(deladd_key, val)?;
+                }
+                obkv.finish()?;
+
+                let key: (u16, &str) = (field_id, new_hyper_normalized_value.as_ref());
+                let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
+                normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
+            } else {
+                // if the facet string is different, we need to insert the deletion and addition in different obkv because the related key is different.
+                // deletion
+                if deladd_reader.get(DelAdd::Deletion).is_some() {
+                    // insert old value
+                    let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
+                    buffer.clear();
+                    let mut obkv = KvWriterDelAdd::new(&mut buffer);
+                    obkv.insert(DelAdd::Deletion, val)?;
+                    obkv.finish()?;
+                    let key: (u16, &str) = (field_id, old_hyper_normalized_value.as_ref());
+                    let key_bytes =
+                        BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
+                    normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
+                }
+
+                // addition
+                if deladd_reader.get(DelAdd::Addition).is_some() {
+                    // insert new value
+                    let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
+                    buffer.clear();
+                    let mut obkv = KvWriterDelAdd::new(&mut buffer);
+                    obkv.insert(DelAdd::Addition, val)?;
+                    obkv.finish()?;
+                    let key: (u16, &str) = (field_id, new_hyper_normalized_value.as_ref());
+                    let key_bytes =
+                        BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
+                    normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
+                }
+            }
+        }
+
+        // nothing to do if we delete and re-add the value.
+        if is_same_value {
+            continue;
         }
 
         let key = FacetGroupKey { field_id, level: 0, left_bound: normalized_value };
@@ -112,3 +145,24 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
     let normalized = sorter_into_reader(normalized_facet_string_docids_sorter, indexer)?;
     sorter_into_reader(facet_string_docids_sorter, indexer).map(|s| (s, normalized))
 }
+
+/// Normalizes the facet string and truncates it to the max length.
+fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
+    let options = NormalizerOption { lossy: true, ..Default::default() };
+    let mut detection = StrDetection::new(facet_string, locales);
+    let token = Token {
+        lemma: std::borrow::Cow::Borrowed(facet_string),
+        script: detection.script(),
+        language: detection.language(),
+        ..Default::default()
+    };
+
+    // truncate the facet string to the max length
+    token
+        .normalize(&options)
+        .lemma
+        .char_indices()
+        .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
+        .map(|(_, c)| c)
+        .collect()
+}
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index 2521b778f..1df31fff2 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -3388,44 +3388,6 @@ mod tests {
         wtxn.commit().unwrap();
     }
 
-    #[test]
-    #[cfg(feature = "all-tokenizations")]
-    fn stored_detected_script_and_language_should_not_return_deleted_documents() {
-        use charabia::{Language, Script};
-        let index = TempIndex::new();
-        let mut wtxn = index.write_txn().unwrap();
-        index
-            .add_documents_using_wtxn(
-                &mut wtxn,
-                documents!([
-                { "id": "0", "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
-                { "id": "1", "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
-                { "id": "2", "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
-                { "id": "3", "title": "関西国際空港限定トートバッグ すもももももももものうち" },
-                { "id": "4", "title": "ภาษาไทยง่ายนิดเดียว" },
-                { "id": "5", "title": "The quick 在尊嚴和權利上一律平等。" },
-            ]))
-            .unwrap();
-
-        let key_cmn = (Script::Cj, Language::Cmn);
-        let cj_cmn_docs =
-            index.script_language_documents_ids(&wtxn, &key_cmn).unwrap().unwrap_or_default();
-        let mut expected_cj_cmn_docids = RoaringBitmap::new();
-        expected_cj_cmn_docids.push(1);
-        expected_cj_cmn_docids.push(5);
-        assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
-
-        delete_documents(&mut wtxn, &index, &["1"]);
-        wtxn.commit().unwrap();
-
-        let rtxn = index.read_txn().unwrap();
-        let cj_cmn_docs =
-            index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap_or_default();
-        let mut expected_cj_cmn_docids = RoaringBitmap::new();
-        expected_cj_cmn_docids.push(5);
-        assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
-    }
-
     #[test]
     fn delete_words_exact_attributes() {
         let index = TempIndex::new();
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 448c74fd8..2cac2777d 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -28,7 +28,7 @@ use crate::vector::settings::{
     WriteBackToDocuments,
 };
 use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
-use crate::{FieldId, FieldsIdsMap, Index, Result};
+use crate::{FieldId, FieldsIdsMap, Index, LocalizedAttributesRule, LocalizedFieldIds, Result};
 
 #[derive(Debug, Clone, PartialEq, Eq, Copy)]
 pub enum Setting<T> {
@@ -159,6 +159,7 @@ pub struct Settings<'a, 't, 'i> {
     proximity_precision: Setting<ProximityPrecision>,
     embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
     search_cutoff: Setting<u64>,
+    localized_attributes_rules: Setting<Vec<LocalizedAttributesRule>>,
 }
 
 impl<'a, 't, 'i> Settings<'a, 't, 'i> {
@@ -193,6 +194,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
             proximity_precision: Setting::NotSet,
             embedder_settings: Setting::NotSet,
             search_cutoff: Setting::NotSet,
+            localized_attributes_rules: Setting::NotSet,
             indexer_config,
         }
     }
@@ -391,6 +393,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
         self.search_cutoff = Setting::Reset;
     }
 
+    pub fn set_localized_attributes_rules(&mut self, value: Vec<LocalizedAttributesRule>) {
+        self.localized_attributes_rules = Setting::Set(value);
+    }
+
+    pub fn reset_localized_attributes_rules(&mut self) {
+        self.localized_attributes_rules = Setting::Reset;
+    }
+
     #[tracing::instrument(
         level = "trace"
         skip(self, progress_callback, should_abort, settings_diff),
@@ -1118,6 +1128,24 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
         Ok(changed)
     }
 
+    fn update_localized_attributes_rules(&mut self) -> Result<bool> {
+        let changed = match &self.localized_attributes_rules {
+            Setting::Set(new) => {
+                let old = self.index.localized_attributes_rules(self.wtxn)?;
+                if old.as_ref() == Some(new) {
+                    false
+                } else {
+                    self.index.put_localized_attributes_rules(self.wtxn, new.clone())?;
+                    true
+                }
+            }
+            Setting::Reset => self.index.delete_localized_attributes_rules(self.wtxn)?,
+            Setting::NotSet => false,
+        };
+
+        Ok(changed)
+    }
+
     pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
     where
         FP: Fn(UpdateIndexingStep) + Sync,
@@ -1151,6 +1179,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
         self.update_searchable()?;
         self.update_exact_attributes()?;
         self.update_proximity_precision()?;
+        self.update_localized_attributes_rules()?;
 
         let embedding_config_updates = self.update_embedding_configs()?;
 
@@ -1229,6 +1258,8 @@ impl InnerIndexSettingsDiff {
                 || old_settings.allowed_separators != new_settings.allowed_separators
                 || old_settings.dictionary != new_settings.dictionary
                 || old_settings.proximity_precision != new_settings.proximity_precision
+                || old_settings.localized_searchable_fields_ids
+                    != new_settings.localized_searchable_fields_ids
         };
 
         let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
@@ -1304,6 +1335,7 @@ impl InnerIndexSettingsDiff {
         }
 
         (existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
+            || self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
     }
 
     pub fn reindex_vectors(&self) -> bool {
@@ -1341,6 +1373,8 @@ pub(crate) struct InnerIndexSettings {
     pub geo_fields_ids: Option<(FieldId, FieldId)>,
     pub non_searchable_fields_ids: Vec<FieldId>,
     pub non_faceted_fields_ids: Vec<FieldId>,
+    pub localized_searchable_fields_ids: LocalizedFieldIds,
+    pub localized_faceted_fields_ids: LocalizedFieldIds,
 }
 
 impl InnerIndexSettings {
@@ -1382,6 +1416,17 @@ impl InnerIndexSettings {
             }
             None => None,
         };
+        let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
+        let localized_searchable_fields_ids = LocalizedFieldIds::new(
+            &localized_attributes_rules,
+            &fields_ids_map,
+            searchable_fields_ids.iter().cloned(),
+        );
+        let localized_faceted_fields_ids = LocalizedFieldIds::new(
+            &localized_attributes_rules,
+            &fields_ids_map,
+            faceted_fields_ids.iter().cloned(),
+        );
 
         let vectors_fids = fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME);
         searchable_fields_ids.retain(|id| !vectors_fids.contains(id));
@@ -1403,6 +1448,8 @@ impl InnerIndexSettings {
             geo_fields_ids,
             non_searchable_fields_ids: vectors_fids.clone(),
             non_faceted_fields_ids: vectors_fids.clone(),
+            localized_searchable_fields_ids,
+            localized_faceted_fields_ids,
         })
     }
 
@@ -1418,6 +1465,12 @@ impl InnerIndexSettings {
         index.put_faceted_fields(wtxn, &new_facets)?;
 
         self.faceted_fields_ids = index.faceted_fields_ids(wtxn)?;
+        let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
+        self.localized_faceted_fields_ids = LocalizedFieldIds::new(
+            &localized_attributes_rules,
+            &self.fields_ids_map,
+            self.faceted_fields_ids.iter().cloned(),
+        );
         Ok(())
     }
 
@@ -1441,8 +1494,13 @@ impl InnerIndexSettings {
                 &self.fields_ids_map,
             )?;
         }
-        let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
-        self.searchable_fields_ids = searchable_fields_ids;
+        self.searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
+        let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
+        self.localized_searchable_fields_ids = LocalizedFieldIds::new(
+            &localized_attributes_rules,
+            &self.fields_ids_map,
+            self.searchable_fields_ids.iter().cloned(),
+        );
 
         Ok(())
     }
@@ -2573,6 +2631,7 @@ mod tests {
                     proximity_precision,
                     embedder_settings,
                     search_cutoff,
+                    localized_attributes_rules,
                 } = settings;
                 assert!(matches!(searchable_fields, Setting::NotSet));
                 assert!(matches!(displayed_fields, Setting::NotSet));
@@ -2597,6 +2656,7 @@ mod tests {
                 assert!(matches!(proximity_precision, Setting::NotSet));
                 assert!(matches!(embedder_settings, Setting::NotSet));
                 assert!(matches!(search_cutoff, Setting::NotSet));
+                assert!(matches!(localized_attributes_rules, Setting::NotSet));
             })
             .unwrap();
     }

From e06fbcc607f0dcd97681a51f90cb5103f7307980 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 23 Jul 2024 14:52:02 +0200
Subject: [PATCH 5/9] Update snapshots

---
 .../lib.rs/import_vectors/Intel to kefir succeeds.snap          | 2 +-
 .../src/snapshots/lib.rs/import_vectors/Intel to kefir.snap     | 2 +-
 .../snapshots/lib.rs/import_vectors/adding Intel succeeds.snap  | 2 +-
 .../src/snapshots/lib.rs/import_vectors/after adding Intel.snap | 2 +-
 .../import_vectors/after_registering_settings_task_vectors.snap | 2 +-
 .../import_vectors/settings_update_processed_vectors.snap       | 2 +-
 .../test_settings_update/after_registering_settings_task.snap   | 2 +-
 .../lib.rs/test_settings_update/settings_update_processed.snap  | 2 +-
 8 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap
index 6f2da1f17..a28e85204 100644
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap	
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir succeeds.snap	
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap
index 569556a19..344134888 100644
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap	
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel to kefir.snap	
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap
index b626d8bc5..fd8096d13 100644
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap	
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding Intel succeeds.snap	
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap
index 65f758f32..24098d658 100644
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap	
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after adding Intel.snap	
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
index 9c628461d..c11dfba62 100644
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,]
diff --git a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
index eddf6d7e8..bc87d8212 100644
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
index 7873fb6cf..e5fab9ad6 100644
--- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,]
diff --git a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
index 8a4838094..b49cee730 100644
--- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
@@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []

From 4fbe048cbfa72fae7b6913e5cd85ccb682b48148 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 23 Jul 2024 15:11:29 +0200
Subject: [PATCH 6/9] Update Cargo.lock

---
 Cargo.lock | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 547f9c0e3..b3bc8b534 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -934,7 +934,7 @@ dependencies = [
 [[package]]
 name = "charabia"
 version = "0.8.12"
-source = "git+https://github.com/meilisearch/charabia.git?branch=simplify-lang-detection#a95a9217265cee515708a679a2ed08ced1ac58a3"
+source = "git+https://github.com/meilisearch/charabia.git?branch=simplify-lang-detection#2992583137458afcebff5d44cae93fa46d9cf664"
 dependencies = [
  "aho-corasick",
  "csv",
@@ -2649,7 +2649,7 @@ checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
 [[package]]
 name = "irg-kvariants"
 version = "0.1.1"
-source = "git+https://github.com/meilisearch/charabia.git?branch=simplify-lang-detection#a95a9217265cee515708a679a2ed08ced1ac58a3"
+source = "git+https://github.com/meilisearch/charabia.git?branch=simplify-lang-detection#2992583137458afcebff5d44cae93fa46d9cf664"
 dependencies = [
  "csv",
  "once_cell",

From 70d71581eefbb494b369bff07bcff78f77993815 Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Tue, 23 Jul 2024 15:19:07 +0200
Subject: [PATCH 7/9] fix clippy

---
 meilisearch-types/src/locales.rs              |  3 +-
 .../src/analytics/segment_analytics.rs        |  2 +-
 .../src/routes/indexes/facet_search.rs        |  2 +-
 meilisearch/src/routes/indexes/settings.rs    |  4 +-
 meilisearch/src/search/mod.rs                 |  1 +
 milli/src/lib.rs                              |  5 +--
 milli/src/localized_attributes_rules.rs       | 40 +++++++++----------
 7 files changed, 27 insertions(+), 30 deletions(-)

diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs
index 6f7fb3a40..8c15fe528 100644
--- a/meilisearch-types/src/locales.rs
+++ b/meilisearch-types/src/locales.rs
@@ -1,9 +1,8 @@
 use deserr::Deserr;
+use milli::LocalizedAttributesRule;
 use serde::{Deserialize, Serialize};
 use serde_json::json;
 
-use milli::LocalizedAttributesRule;
-
 /// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language.
 ///
 /// this enum implements `Deserr` in order to be used in the API.
diff --git a/meilisearch/src/analytics/segment_analytics.rs b/meilisearch/src/analytics/segment_analytics.rs
index 407b90658..07350d506 100644
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@@ -780,7 +780,7 @@ impl SearchAggregator {
         ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
 
         if let Some(locales) = locales {
-            ret.locales = locales.into_iter().copied().collect();
+            ret.locales = locales.iter().copied().collect();
         }
 
         ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
diff --git a/meilisearch/src/routes/indexes/facet_search.rs b/meilisearch/src/routes/indexes/facet_search.rs
index da575fdc4..a648987ca 100644
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@@ -90,7 +90,7 @@ pub async fn search(
             facet_name,
             search_kind,
             index_scheduler.features(),
-            locales
+            locales,
         )
     })
     .await?;
diff --git a/meilisearch/src/routes/indexes/settings.rs b/meilisearch/src/routes/indexes/settings.rs
index b62690295..6f081f1c7 100644
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@@ -489,7 +489,7 @@ make_setting_route!(
         analytics.publish(
             "LocalizedAttributesRules Updated".to_string(),
             json!({
-                "locales": rules.as_ref().map(|rules| rules.iter().map(|rule| rule.locales.iter().cloned()).flatten().collect::<std::collections::BTreeSet<_>>())
+                "locales": rules.as_ref().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>())
             }),
             Some(req),
         );
@@ -808,7 +808,7 @@ pub async fn update_all(
             },
             "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()),
             "search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(),
-            "locales": new_settings.localized_attributes.as_ref().set().map(|rules| rules.into_iter().map(|rule| rule.locales.iter().cloned()).flatten().collect::<std::collections::BTreeSet<_>>()),
+            "locales": new_settings.localized_attributes.as_ref().set().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>()),
         }),
         Some(&req),
     );
diff --git a/meilisearch/src/search/mod.rs b/meilisearch/src/search/mod.rs
index 11bf4f84e..dada9159b 100644
--- a/meilisearch/src/search/mod.rs
+++ b/meilisearch/src/search/mod.rs
@@ -1657,6 +1657,7 @@ fn make_document(
     Ok(document)
 }
 
+#[allow(clippy::too_many_arguments)]
 fn format_fields(
     document: &Document,
     field_ids_map: &FieldsIdsMap,
diff --git a/milli/src/lib.rs b/milli/src/lib.rs
index 461971ddf..8008b7bd1 100644
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@@ -63,6 +63,8 @@ pub use self::heed_codec::{
     UncheckedU8StrStrCodec,
 };
 pub use self::index::Index;
+pub use self::localized_attributes_rules::LocalizedAttributesRule;
+use self::localized_attributes_rules::LocalizedFieldIds;
 pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
 pub use self::search::similar::Similar;
 pub use self::search::{
@@ -70,9 +72,6 @@ pub use self::search::{
     Search, SearchResult, SemanticSearch, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
 };
 
-pub use self::localized_attributes_rules::LocalizedAttributesRule;
-use self::localized_attributes_rules::LocalizedFieldIds;
-
 pub type Result<T> = std::result::Result<T, error::Error>;
 
 pub type Attribute = u32;
diff --git a/milli/src/localized_attributes_rules.rs b/milli/src/localized_attributes_rules.rs
index a3b3e820b..aa4eddee1 100644
--- a/milli/src/localized_attributes_rules.rs
+++ b/milli/src/localized_attributes_rules.rs
@@ -35,19 +35,17 @@ impl LocalizedAttributesRule {
 }
 
 fn match_pattern(pattern: &str, str: &str) -> bool {
-    let res = if pattern == "*" {
+    if pattern == "*" {
         true
     } else if pattern.starts_with('*') && pattern.ends_with('*') {
         str.contains(&pattern[1..pattern.len() - 1])
-    } else if pattern.ends_with('*') {
-        str.starts_with(&pattern[..pattern.len() - 1])
-    } else if pattern.starts_with('*') {
-        str.ends_with(&pattern[1..])
+    } else if let Some(pattern) = pattern.strip_prefix('*') {
+        str.ends_with(pattern)
+    } else if let Some(pattern) = pattern.strip_suffix('*') {
+        str.starts_with(pattern)
     } else {
         pattern == str
-    };
-
-    res
+    }
 }
 
 #[derive(Debug, Clone, PartialEq, Eq)]
@@ -87,7 +85,7 @@ impl LocalizedFieldIds {
         Self { field_id_to_locales }
     }
 
-    pub fn locales<'a>(&'a self, fields_id: FieldId) -> Option<&'a [Language]> {
+    pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> {
         self.field_id_to_locales.get(&fields_id).map(Vec::as_slice)
     }
 }
@@ -98,17 +96,17 @@ mod tests {
 
     #[test]
     fn test_match_pattern() {
-        assert_eq!(match_pattern("*", "test"), true);
-        assert_eq!(match_pattern("test*", "test"), true);
-        assert_eq!(match_pattern("test*", "testa"), true);
-        assert_eq!(match_pattern("*test", "test"), true);
-        assert_eq!(match_pattern("*test", "atest"), true);
-        assert_eq!(match_pattern("*test*", "test"), true);
-        assert_eq!(match_pattern("*test*", "atesta"), true);
-        assert_eq!(match_pattern("*test*", "atest"), true);
-        assert_eq!(match_pattern("*test*", "testa"), true);
-        assert_eq!(match_pattern("test*test", "test"), false);
-        assert_eq!(match_pattern("*test", "testa"), false);
-        assert_eq!(match_pattern("test*", "atest"), false);
+        assert!(match_pattern("*", "test"));
+        assert!(match_pattern("test*", "test"));
+        assert!(match_pattern("test*", "testa"));
+        assert!(match_pattern("*test", "test"));
+        assert!(match_pattern("*test", "atest"));
+        assert!(match_pattern("*test*", "test"));
+        assert!(match_pattern("*test*", "atesta"));
+        assert!(match_pattern("*test*", "atest"));
+        assert!(match_pattern("*test*", "testa"));
+        assert!(!match_pattern("test*test", "test"));
+        assert!(!match_pattern("*test", "testa"));
+        assert!(!match_pattern("test*", "atest"));
     }
 }

From a918561ac1ff220e4284f138e4f8a638b0e919af Mon Sep 17 00:00:00 2001
From: ManyTheFish <many@meilisearch.com>
Date: Thu, 25 Jul 2024 10:16:23 +0200
Subject: [PATCH 8/9] Fix PR comments

---
 meilisearch-types/src/locales.rs        |  26 +-
 meilisearch/tests/dumps/mod.rs          |  42 +-
 meilisearch/tests/search/locales.rs     | 675 ++++++++++++++++++++++--
 milli/src/index.rs                      |   4 +-
 milli/src/localized_attributes_rules.rs |   2 +
 milli/src/search/facet/search.rs        |   2 +-
 milli/src/update/settings.rs            |  19 +-
 7 files changed, 689 insertions(+), 81 deletions(-)

diff --git a/meilisearch-types/src/locales.rs b/meilisearch-types/src/locales.rs
index 8c15fe528..c6902dd71 100644
--- a/meilisearch-types/src/locales.rs
+++ b/meilisearch-types/src/locales.rs
@@ -40,19 +40,7 @@ macro_rules! make_locale {
         impl std::fmt::Display for LocaleFormatError {
             fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
                 let valid_locales = [$(Locale::$language),+].iter().map(|l| format!("`{}`", json!(l).as_str().unwrap())).collect::<Vec<_>>().join(", ");
-                write!(f, "Unknown value `{}`, expected one of {}", self.invalid_locale, valid_locales)
-            }
-        }
-
-        impl std::error::Error for LocaleFormatError {}
-
-        impl std::str::FromStr for Locale {
-            type Err = LocaleFormatError;
-
-            fn from_str(s: &str) -> Result<Self, Self::Err> {
-                milli::tokenizer::Language::from_code(s).map(Self::from).ok_or(LocaleFormatError {
-                    invalid_locale: s.to_string(),
-                })
+                write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales)
             }
         }
     };
@@ -130,6 +118,18 @@ make_locale! {
     Hye
 }
 
+impl std::error::Error for LocaleFormatError {}
+
+impl std::str::FromStr for Locale {
+    type Err = LocaleFormatError;
+
+    fn from_str(s: &str) -> Result<Self, Self::Err> {
+        milli::tokenizer::Language::from_code(s)
+            .map(Self::from)
+            .ok_or(LocaleFormatError { invalid_locale: s.to_string() })
+    }
+}
+
 #[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize)]
 #[deserr(rename_all = camelCase)]
 #[serde(rename_all = "camelCase")]
diff --git a/meilisearch/tests/dumps/mod.rs b/meilisearch/tests/dumps/mod.rs
index 92f72fe78..ea98e200f 100644
--- a/meilisearch/tests/dumps/mod.rs
+++ b/meilisearch/tests/dumps/mod.rs
@@ -78,7 +78,8 @@ async fn import_dump_v1_movie_raw() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -240,7 +241,8 @@ async fn import_dump_v1_movie_with_settings() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -388,7 +390,8 @@ async fn import_dump_v1_rubygems_with_settings() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -522,7 +525,8 @@ async fn import_dump_v2_movie_raw() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -668,7 +672,8 @@ async fn import_dump_v2_movie_with_settings() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -813,7 +818,8 @@ async fn import_dump_v2_rubygems_with_settings() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -947,7 +953,8 @@ async fn import_dump_v3_movie_raw() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -1093,7 +1100,8 @@ async fn import_dump_v3_movie_with_settings() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -1238,7 +1246,8 @@ async fn import_dump_v3_rubygems_with_settings() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -1372,7 +1381,8 @@ async fn import_dump_v4_movie_raw() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -1518,7 +1528,8 @@ async fn import_dump_v4_movie_with_settings() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -1663,7 +1674,8 @@ async fn import_dump_v4_rubygems_with_settings() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###
     );
@@ -1909,7 +1921,8 @@ async fn import_dump_v6_containing_experimental_features() {
       "pagination": {
         "maxTotalHits": 1000
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###);
 
@@ -2087,7 +2100,8 @@ async fn generate_and_import_dump_containing_vectors() {
           "documentTemplate": "{{doc.doggo}}"
         }
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###);
 
diff --git a/meilisearch/tests/search/locales.rs b/meilisearch/tests/search/locales.rs
index 722694ba3..9f1c22b75 100644
--- a/meilisearch/tests/search/locales.rs
+++ b/meilisearch/tests/search/locales.rs
@@ -103,12 +103,41 @@ async fn simple_search() {
 
     // english
     index
-        .search(json!({"q": "Atta", "attributesToRetrieve": ["id"]}), |response, code| {
+        .search(json!({"q": "Atta", "attributesToHighlight": ["*"]}), |response, code| {
             snapshot!(response, @r###"
             {
               "hits": [
                 {
-                  "id": 852
+                  "name_en": "Attack on Titan",
+                  "name_ja": "進撃の巨人",
+                  "author_en": "Hajime Isayama",
+                  "author_ja": "諫山 創",
+                  "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+                  "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+                  "id": 852,
+                  "_vectors": {
+                    "manual": [
+                      1.0,
+                      2.0,
+                      3.0
+                    ]
+                  },
+                  "_formatted": {
+                    "name_en": "<em>Atta</em>ck on Titan",
+                    "name_ja": "進撃の巨人",
+                    "author_en": "Hajime Isayama",
+                    "author_ja": "諫山 創",
+                    "description_en": "<em>Atta</em>ck on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+                    "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+                    "id": "852",
+                    "_vectors": {
+                      "manual": [
+                        "1.0",
+                        "2.0",
+                        "3.0"
+                      ]
+                    }
+                  }
                 }
               ],
               "query": "Atta",
@@ -124,12 +153,35 @@ async fn simple_search() {
 
     // japanese
     index
-        .search(json!({"q": "進撃", "attributesToRetrieve": ["id"]}), |response, code| {
+        .search(json!({"q": "進撃", "attributesToHighlight": ["*"]}), |response, code| {
             snapshot!(response, @r###"
             {
               "hits": [
                 {
-                  "id": 853
+                  "name_zh": "进击的巨人",
+                  "author_zh": "諫山創",
+                  "description_zh": "进击的巨人是日本的漫画系列，由諫山 創作画。",
+                  "id": 853,
+                  "_vectors": {
+                    "manual": [
+                      1.0,
+                      2.0,
+                      3.0
+                    ]
+                  },
+                  "_formatted": {
+                    "name_zh": "<em>进击</em>的巨人",
+                    "author_zh": "諫山創",
+                    "description_zh": "<em>进击</em>的巨人是日本的漫画系列，由諫山 創作画。",
+                    "id": "853",
+                    "_vectors": {
+                      "manual": [
+                        "1.0",
+                        "2.0",
+                        "3.0"
+                      ]
+                    }
+                  }
                 }
               ],
               "query": "進撃",
@@ -145,25 +197,77 @@ async fn simple_search() {
 
     index
         .search(
-            json!({"q": "進撃", "attributesToRetrieve": ["id"], "locales": ["jpn"]}),
+            json!({"q": "進撃", "locales": ["jpn"], "attributesToHighlight": ["*"]}),
             |response, code| {
                 snapshot!(response, @r###"
-            {
-              "hits": [
                 {
-                  "id": 852
-                },
-                {
-                  "id": 853
+                  "hits": [
+                    {
+                      "name_en": "Attack on Titan",
+                      "name_ja": "進撃の巨人",
+                      "author_en": "Hajime Isayama",
+                      "author_ja": "諫山 創",
+                      "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+                      "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+                      "id": 852,
+                      "_vectors": {
+                        "manual": [
+                          1.0,
+                          2.0,
+                          3.0
+                        ]
+                      },
+                      "_formatted": {
+                        "name_en": "Attack on Titan",
+                        "name_ja": "<em>進撃</em>の巨人",
+                        "author_en": "Hajime Isayama",
+                        "author_ja": "諫山 創",
+                        "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+                        "description_ja": "<em>進撃</em>の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+                        "id": "852",
+                        "_vectors": {
+                          "manual": [
+                            "1.0",
+                            "2.0",
+                            "3.0"
+                          ]
+                        }
+                      }
+                    },
+                    {
+                      "name_zh": "进击的巨人",
+                      "author_zh": "諫山創",
+                      "description_zh": "进击的巨人是日本的漫画系列，由諫山 創作画。",
+                      "id": 853,
+                      "_vectors": {
+                        "manual": [
+                          1.0,
+                          2.0,
+                          3.0
+                        ]
+                      },
+                      "_formatted": {
+                        "name_zh": "进击的巨人",
+                        "author_zh": "諫山創",
+                        "description_zh": "进击的巨人是日本的漫画系列，由諫山 創作画。",
+                        "id": "853",
+                        "_vectors": {
+                          "manual": [
+                            "1.0",
+                            "2.0",
+                            "3.0"
+                          ]
+                        }
+                      }
+                    }
+                  ],
+                  "query": "進撃",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 2
                 }
-              ],
-              "query": "進撃",
-              "processingTimeMs": "[duration]",
-              "limit": 20,
-              "offset": 0,
-              "estimatedTotalHits": 2
-            }
-            "###);
+                "###);
                 snapshot!(code, @"200 OK");
             },
         )
@@ -171,15 +275,67 @@ async fn simple_search() {
 
     // chinese
     index
-        .search(json!({"q": "进击", "attributesToRetrieve": ["id"]}), |response, code| {
+        .search(json!({"q": "进击", "attributesToHighlight": ["*"]}), |response, code| {
             snapshot!(response, @r###"
             {
               "hits": [
                 {
-                  "id": 853
+                  "name_zh": "进击的巨人",
+                  "author_zh": "諫山創",
+                  "description_zh": "进击的巨人是日本的漫画系列，由諫山 創作画。",
+                  "id": 853,
+                  "_vectors": {
+                    "manual": [
+                      1.0,
+                      2.0,
+                      3.0
+                    ]
+                  },
+                  "_formatted": {
+                    "name_zh": "<em>进击</em>的巨人",
+                    "author_zh": "諫山創",
+                    "description_zh": "<em>进击</em>的巨人是日本的漫画系列，由諫山 創作画。",
+                    "id": "853",
+                    "_vectors": {
+                      "manual": [
+                        "1.0",
+                        "2.0",
+                        "3.0"
+                      ]
+                    }
+                  }
                 },
                 {
-                  "id": 852
+                  "name_en": "Attack on Titan",
+                  "name_ja": "進撃の巨人",
+                  "author_en": "Hajime Isayama",
+                  "author_ja": "諫山 創",
+                  "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+                  "description_ja": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+                  "id": 852,
+                  "_vectors": {
+                    "manual": [
+                      1.0,
+                      2.0,
+                      3.0
+                    ]
+                  },
+                  "_formatted": {
+                    "name_en": "Attack on Titan",
+                    "name_ja": "<em>進撃</em>の巨人",
+                    "author_en": "Hajime Isayama",
+                    "author_ja": "諫山 創",
+                    "description_en": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+                    "description_ja": "<em>進撃</em>の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+                    "id": "852",
+                    "_vectors": {
+                      "manual": [
+                        "1.0",
+                        "2.0",
+                        "3.0"
+                      ]
+                    }
+                  }
                 }
               ],
               "query": "进击",
@@ -226,7 +382,7 @@ async fn force_locales() {
     // chinese detection
     index
         .search(
-            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}),
+            json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
             |response, code| {
                 snapshot!(response, @r###"
                 {
@@ -246,13 +402,36 @@ async fn force_locales() {
     // force japanese
     index
         .search(
-            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"], "locales": ["jpn"]}),
+            json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}),
             |response, code| {
                 snapshot!(response, @r###"
                 {
                   "hits": [
                     {
-                      "id": 853
+                      "name_zh": "进击的巨人",
+                      "author_zh": "諫山創",
+                      "description_zh": "进击的巨人是日本的漫画系列，由諫山 創作画。",
+                      "id": 853,
+                      "_vectors": {
+                        "manual": [
+                          1.0,
+                          2.0,
+                          3.0
+                        ]
+                      },
+                      "_formatted": {
+                        "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                        "author_zh": "諫山創",
+                        "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                        "id": "853",
+                        "_vectors": {
+                          "manual": [
+                            "1.0",
+                            "2.0",
+                            "3.0"
+                          ]
+                        }
+                      }
                     }
                   ],
                   "query": "\"进击的巨人\"",
@@ -300,7 +479,7 @@ async fn force_locales_with_pattern() {
     // chinese detection
     index
         .search(
-            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"]}),
+            json!({"q": "\"进击的巨人\"", "attributesToHighlight": ["*"]}),
             |response, code| {
                 snapshot!(response, @r###"
                 {
@@ -320,13 +499,36 @@ async fn force_locales_with_pattern() {
     // force japanese
     index
         .search(
-            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"], "locales": ["jpn"]}),
+            json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}),
             |response, code| {
                 snapshot!(response, @r###"
                 {
                   "hits": [
                     {
-                      "id": 853
+                      "name_zh": "进击的巨人",
+                      "author_zh": "諫山創",
+                      "description_zh": "进击的巨人是日本的漫画系列，由諫山 創作画。",
+                      "id": 853,
+                      "_vectors": {
+                        "manual": [
+                          1.0,
+                          2.0,
+                          3.0
+                        ]
+                      },
+                      "_formatted": {
+                        "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                        "author_zh": "諫山創",
+                        "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                        "id": "853",
+                        "_vectors": {
+                          "manual": [
+                            "1.0",
+                            "2.0",
+                            "3.0"
+                          ]
+                        }
+                      }
                     }
                   ],
                   "query": "\"进击的巨人\"",
@@ -372,7 +574,7 @@ async fn force_locales_with_pattern_nested() {
     // chinese
     index
         .search(
-            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"], "locales": ["cmn"]}),
+            json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}),
             |response, code| {
                 snapshot!(response, @r###"
                 {
@@ -392,13 +594,60 @@ async fn force_locales_with_pattern_nested() {
     // force japanese
     index
         .search(
-            json!({"q": "\"进击的巨人\"", "attributesToRetrieve": ["id"], "locales": ["jpn"]}),
+            json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}),
             |response, code| {
                 snapshot!(response, @r###"
                 {
                   "hits": [
                     {
-                      "id": 852
+                      "document_en": {
+                        "name": "Attack on Titan",
+                        "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+                        "author": "Hajime Isayama"
+                      },
+                      "document_ja": {
+                        "name": "進撃の巨人",
+                        "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+                        "author": "諫山 創"
+                      },
+                      "document_zh": {
+                        "name": "进击的巨人",
+                        "description": "进击的巨人是日本的漫画系列，由諫山 創作画。",
+                        "author": "諫山創"
+                      },
+                      "id": 852,
+                      "_vectors": {
+                        "manual": [
+                          1.0,
+                          2.0,
+                          3.0
+                        ]
+                      },
+                      "_formatted": {
+                        "document_en": {
+                          "name": "Attack on Titan",
+                          "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+                          "author": "Hajime Isayama"
+                        },
+                        "document_ja": {
+                          "name": "進撃の巨人",
+                          "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+                          "author": "諫山 創"
+                        },
+                        "document_zh": {
+                          "name": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                          "description": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                          "author": "諫山創"
+                        },
+                        "id": "852",
+                        "_vectors": {
+                          "manual": [
+                            "1.0",
+                            "2.0",
+                            "3.0"
+                          ]
+                        }
+                      }
                     }
                   ],
                   "query": "\"进击的巨人\"",
@@ -413,6 +662,357 @@ async fn force_locales_with_pattern_nested() {
         )
         .await;
 }
+#[actix_rt::test]
+async fn force_different_locales_with_pattern() {
+    let server = Server::new().await;
+
+    let index = server.index("test");
+    let documents = DOCUMENTS.clone();
+    let (response, _) = index
+        .update_settings(
+            json!({
+                "searchableAttributes": ["name_en", "name_ja", "name_zh", "author_en", "author_ja", "author_zh", "description_en", "description_ja", "description_zh"],
+                "localizedAttributes": [
+                    // force japanese
+                    {"attributePatterns": ["*_zh"], "locales": ["jpn"]},
+                    // force chinese
+                    {"attributePatterns": ["*_ja"], "locales": ["cmn"]}
+                ]
+            }),
+        )
+        .await;
+    snapshot!(response, @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    // force chinese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 0
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+
+    // force japanese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [
+                    {
+                      "name_zh": "进击的巨人",
+                      "author_zh": "諫山創",
+                      "description_zh": "进击的巨人是日本的漫画系列，由諫山 創作画。",
+                      "id": 853,
+                      "_vectors": {
+                        "manual": [
+                          1.0,
+                          2.0,
+                          3.0
+                        ]
+                      },
+                      "_formatted": {
+                        "name_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                        "author_zh": "諫山創",
+                        "description_zh": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                        "id": "853",
+                        "_vectors": {
+                          "manual": [
+                            "1.0",
+                            "2.0",
+                            "3.0"
+                          ]
+                        }
+                      }
+                    }
+                  ],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 1
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+}
+
+#[actix_rt::test]
+async fn force_different_locales_with_pattern_nested() {
+    let server = Server::new().await;
+
+    let index = server.index("test");
+    let documents = NESTED_DOCUMENTS.clone();
+    let (response, _) = index
+        .update_settings(json!({
+            "searchableAttributes": ["document_en", "document_ja", "document_zh"],
+            "localizedAttributes": [
+              // force japanese
+              {"attributePatterns": ["*_zh.*"], "locales": ["jpn"]},
+              // force chinese
+              {"attributePatterns": ["document_ja.*", "document_zh.*"], "locales": ["cmn"]}
+            ]
+        }))
+        .await;
+    snapshot!(response, @r###"
+    {
+      "taskUid": 0,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    index.add_documents(documents, None).await;
+    index.wait_task(1).await;
+
+    // chinese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 0
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+
+    // force japanese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [
+                    {
+                      "document_en": {
+                        "name": "Attack on Titan",
+                        "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+                        "author": "Hajime Isayama"
+                      },
+                      "document_ja": {
+                        "name": "進撃の巨人",
+                        "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+                        "author": "諫山 創"
+                      },
+                      "document_zh": {
+                        "name": "进击的巨人",
+                        "description": "进击的巨人是日本的漫画系列，由諫山 創作画。",
+                        "author": "諫山創"
+                      },
+                      "id": 852,
+                      "_vectors": {
+                        "manual": [
+                          1.0,
+                          2.0,
+                          3.0
+                        ]
+                      },
+                      "_formatted": {
+                        "document_en": {
+                          "name": "Attack on Titan",
+                          "description": "Attack on Titan is a Japanese manga series written and illustrated by Hajime Isayama",
+                          "author": "Hajime Isayama"
+                        },
+                        "document_ja": {
+                          "name": "進撃の巨人",
+                          "description": "進撃の巨人は、日本の漫画シリーズであり、諫山 創によって作画されている。",
+                          "author": "諫山 創"
+                        },
+                        "document_zh": {
+                          "name": "<em>进</em><em>击</em><em>的</em><em>巨人</em>",
+                          "description": "<em>进</em><em>击</em><em>的</em><em>巨人</em>是日本的漫画系列，由諫山 創作画。",
+                          "author": "諫山創"
+                        },
+                        "id": "852",
+                        "_vectors": {
+                          "manual": [
+                            "1.0",
+                            "2.0",
+                            "3.0"
+                          ]
+                        }
+                      }
+                    }
+                  ],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 1
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+}
+
+#[actix_rt::test]
+async fn settings_change() {
+    let server = Server::new().await;
+
+    let index = server.index("test");
+    let documents = NESTED_DOCUMENTS.clone();
+    index.add_documents(documents, None).await;
+    index.wait_task(0).await;
+    let (response, _) = index
+        .update_settings(json!({
+            "searchableAttributes": ["document_en", "document_ja", "document_zh"],
+            "localizedAttributes": [
+                // force japanese
+                {"attributePatterns": ["document_ja.*", "*_zh.*"], "locales": ["jpn"]}
+            ]
+        }))
+        .await;
+    snapshot!(response, @r###"
+    {
+      "taskUid": 1,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    index.wait_task(1).await;
+
+    // chinese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 0
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+
+    // force japanese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 0
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+
+    // change settings
+    let (response, _) = index
+        .update_settings(json!({
+            "searchableAttributes": ["document_en", "document_ja", "document_zh"],
+            "localizedAttributes": [
+              // force japanese
+              {"attributePatterns": ["*_zh.*"], "locales": ["jpn"]},
+              // force chinese
+              {"attributePatterns": ["document_ja.*"], "locales": ["cmn"]}
+            ]
+        }))
+        .await;
+    snapshot!(response, @r###"
+    {
+      "taskUid": 2,
+      "indexUid": "test",
+      "status": "enqueued",
+      "type": "settingsUpdate",
+      "enqueuedAt": "[date]"
+    }
+    "###);
+    index.wait_task(2).await;
+
+    // chinese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "locales": ["cmn"], "attributesToHighlight": ["*"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 0
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+
+    // force japanese
+    index
+        .search(
+            json!({"q": "\"进击的巨人\"", "locales": ["jpn"], "attributesToHighlight": ["*"]}),
+            |response, code| {
+                snapshot!(response, @r###"
+                {
+                  "hits": [],
+                  "query": "\"进击的巨人\"",
+                  "processingTimeMs": "[duration]",
+                  "limit": 20,
+                  "offset": 0,
+                  "estimatedTotalHits": 0
+                }
+                "###);
+                snapshot!(code, @"200 OK");
+            },
+        )
+        .await;
+}
 
 #[actix_rt::test]
 async fn invalid_locales() {
@@ -428,9 +1028,7 @@ async fn invalid_locales() {
     index.add_documents(documents, None).await;
     index.wait_task(1).await;
 
-    let (response, code) = index
-        .search_post(json!({"q": "Atta", "attributesToRetrieve": ["id"], "locales": ["invalid"]}))
-        .await;
+    let (response, code) = index.search_post(json!({"q": "Atta", "locales": ["invalid"]})).await;
     snapshot!(code, @"400 Bad Request");
     snapshot!(json_string!(response), @r###"
     {
@@ -442,17 +1040,12 @@ async fn invalid_locales() {
     "###);
 
     let (response, code) = index
-        .search_get(
-            &yaup::to_string(
-                &json!({"q": "Atta", "attributesToRetrieve": ["id"], "locales": ["invalid"]}),
-            )
-            .unwrap(),
-        )
+        .search_get(&yaup::to_string(&json!({"q": "Atta", "locales": ["invalid"]})).unwrap())
         .await;
     snapshot!(code, @"400 Bad Request");
     snapshot!(json_string!(response), @r###"
     {
-      "message": "Invalid value in parameter `locales`: Unknown value `invalid`, expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`",
+      "message": "Invalid value in parameter `locales`: Unsupported locale `invalid`, expected one of `epo`, `eng`, `rus`, `cmn`, `spa`, `por`, `ita`, `ben`, `fra`, `deu`, `ukr`, `kat`, `ara`, `hin`, `jpn`, `heb`, `yid`, `pol`, `amh`, `jav`, `kor`, `nob`, `dan`, `swe`, `fin`, `tur`, `nld`, `hun`, `ces`, `ell`, `bul`, `bel`, `mar`, `kan`, `ron`, `slv`, `hrv`, `srp`, `mkd`, `lit`, `lav`, `est`, `tam`, `vie`, `urd`, `tha`, `guj`, `uzb`, `pan`, `aze`, `ind`, `tel`, `pes`, `mal`, `ori`, `mya`, `nep`, `sin`, `khm`, `tuk`, `aka`, `zul`, `sna`, `afr`, `lat`, `slk`, `cat`, `tgl`, `hye`",
       "code": "invalid_search_locales",
       "type": "invalid_request",
       "link": "https://docs.meilisearch.com/errors#invalid_search_locales"
diff --git a/milli/src/index.rs b/milli/src/index.rs
index f5342f2c0..3a2f3169c 100644
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@@ -1558,7 +1558,7 @@ impl Index {
         rtxn: &RoTxn<'_>,
     ) -> heed::Result<Option<Vec<LocalizedAttributesRule>>> {
         self.main
-            .remap_types::<Str, SerdeBincode<Vec<LocalizedAttributesRule>>>()
+            .remap_types::<Str, SerdeJson<Vec<LocalizedAttributesRule>>>()
             .get(rtxn, main_key::LOCALIZED_ATTRIBUTES_RULES)
     }
 
@@ -1567,7 +1567,7 @@ impl Index {
         txn: &mut RwTxn<'_>,
         val: Vec<LocalizedAttributesRule>,
     ) -> heed::Result<()> {
-        self.main.remap_types::<Str, SerdeBincode<Vec<LocalizedAttributesRule>>>().put(
+        self.main.remap_types::<Str, SerdeJson<Vec<LocalizedAttributesRule>>>().put(
             txn,
             main_key::LOCALIZED_ATTRIBUTES_RULES,
             &val,
diff --git a/milli/src/localized_attributes_rules.rs b/milli/src/localized_attributes_rules.rs
index aa4eddee1..739d03043 100644
--- a/milli/src/localized_attributes_rules.rs
+++ b/milli/src/localized_attributes_rules.rs
@@ -71,6 +71,8 @@ impl LocalizedFieldIds {
                 for rule in rules {
                     if rule.match_str(field_name) {
                         locales.extend(rule.locales.iter());
+                        // Take the first rule that matches
+                        break;
                     }
                 }
 
diff --git a/milli/src/search/facet/search.rs b/milli/src/search/facet/search.rs
index 6ef62e39a..39fb7374a 100644
--- a/milli/src/search/facet/search.rs
+++ b/milli/src/search/facet/search.rs
@@ -346,5 +346,5 @@ fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> S
         ..Default::default()
     };
 
-    token.normalize(&options).lemma.to_string()
+    token.normalize(&options).lemma.into_owned()
 }
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index 2cac2777d..e423852f1 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -1128,22 +1128,21 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
         Ok(changed)
     }
 
-    fn update_localized_attributes_rules(&mut self) -> Result<bool> {
-        let changed = match &self.localized_attributes_rules {
+    fn update_localized_attributes_rules(&mut self) -> Result<()> {
+        match &self.localized_attributes_rules {
             Setting::Set(new) => {
                 let old = self.index.localized_attributes_rules(self.wtxn)?;
-                if old.as_ref() == Some(new) {
-                    false
-                } else {
+                if old.as_ref() != Some(new) {
                     self.index.put_localized_attributes_rules(self.wtxn, new.clone())?;
-                    true
                 }
             }
-            Setting::Reset => self.index.delete_localized_attributes_rules(self.wtxn)?,
-            Setting::NotSet => false,
-        };
+            Setting::Reset => {
+                self.index.delete_localized_attributes_rules(self.wtxn)?;
+            }
+            Setting::NotSet => (),
+        }
 
-        Ok(changed)
+        Ok(())
     }
 
     pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>

From 59115fd058332fd6dbbfb503846fa389b210780d Mon Sep 17 00:00:00 2001
From: Louis Dureuil <louis@meilisearch.com>
Date: Thu, 25 Jul 2024 10:50:45 +0200
Subject: [PATCH 9/9] Fix tests

---
 meilisearch/tests/settings/get_settings.rs | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/meilisearch/tests/settings/get_settings.rs b/meilisearch/tests/settings/get_settings.rs
index 239151197..2f51dfb44 100644
--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@@ -55,7 +55,7 @@ async fn get_settings() {
     let (response, code) = index.settings().await;
     assert_eq!(code, 200);
     let settings = response.as_object().unwrap();
-    assert_eq!(settings.keys().len(), 16);
+    assert_eq!(settings.keys().len(), 17);
     assert_eq!(settings["displayedAttributes"], json!(["*"]));
     assert_eq!(settings["searchableAttributes"], json!(["*"]));
     assert_eq!(settings["filterableAttributes"], json!([]));
@@ -195,7 +195,8 @@ async fn secrets_are_hidden_in_settings() {
           "response": "{{embedding}}"
         }
       },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
+      "localizedAttributes": null
     }
     "###);