Merge #4819

4819: Language settings r=dureuill a=ManyTheFish # Pull Request ## Related issue Fixes #4749 ## What does this PR do? - [Implement localized search](c0c6955c0d) - [Implement localized attributes settings](bde827b055) ## Related PRD - [PRD](https://www.notion.so/meilisearch/Define-language-settings-to-impact-relevancy-bee62e18b7584c4f87d18a7654855329) - [Public usage](https://www.notion.so/meilisearch/v1-10-Language-settings-usage-26c5d98b553349d9abacbe7aff698e4e) Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Louis Dureuil <louis@meilisearch.com>
2025-05-25 09:03:59 +02:00 · 2024-07-25 09:00:33 +00:00 · 2024-07-25 09:00:33 +00:00 · 76bc2c18e8
commit 76bc2c18e8
parent c26bd68de5 59115fd058
45 changed files with 2004 additions and 498 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@ -934,19 +934,15 @@ dependencies = [
 [[package]]
 name = "charabia"
 version = "0.8.12"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/meilisearch/charabia.git?branch=simplify-lang-detection#2992583137458afcebff5d44cae93fa46d9cf664"
 checksum = "9868a22f10dee80498a8a2b6c641d80bf28ea4495fcf71c2dc4836c2dd23958c"
 dependencies = [
 "aho-corasick",
 "cow-utils",
 "csv",
 "deunicode",
 "either",
 "fst",
 "irg-kvariants",
 "jieba-rs",
 "lindera",
 "litemap",
 "once_cell",
 "pinyin",
 "serde",
@ -954,7 +950,6 @@ dependencies = [
 "unicode-normalization",
 "wana_kana",
 "whatlang",
 "zerovec",
 ]
 [[package]]
@ -1145,12 +1140,6 @@ version = "0.8.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "e496a50fda8aacccc86d7529e2c1e0892dbd0f898a6b5645b5561b89c3210efa"
 [[package]]
 name = "cow-utils"
 version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "79bb3adfaf5f75d24b01aee375f7555907840fa2800e5ec8fa3b9e2031830173"
 [[package]]
 name = "cpufeatures"
 version = "0.2.12"
@ -1551,12 +1540,6 @@ dependencies = [
 "syn 2.0.60",
 ]
 [[package]]
 name = "deunicode"
 version = "1.6.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "339544cc9e2c4dc3fc7149fd630c5f22263a4fdf18a98afd0075784968b5cf00"
 [[package]]
 name = "digest"
 version = "0.10.7"
@ -2666,8 +2649,7 @@ checksum = "28b29a3cd74f0f4598934efe3aeba42bae0eb4680554128851ebbecb02af14e6"
 [[package]]
 name = "irg-kvariants"
 version = "0.1.1"
-source = "registry+https://github.com/rust-lang/crates.io-index"
+source = "git+https://github.com/meilisearch/charabia.git?branch=simplify-lang-detection#2992583137458afcebff5d44cae93fa46d9cf664"
 checksum = "ef2af7c331f2536964a32b78a7d2e0963d78b42f4a76323b16cc7d94b1ddce26"
 dependencies = [
 "csv",
 "once_cell",
@ -3278,12 +3260,6 @@ dependencies = [
 "unicode-segmentation",
 ]
 [[package]]
 name = "litemap"
 version = "0.7.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "643cb0b8d4fcc284004d5fd0d67ccf61dfffadb7f75e1e71bc420f4688a3a704"
 [[package]]
 name = "lmdb-master-sys"
 version = "0.2.2"
@ -6506,15 +6482,6 @@ dependencies = [
 "syn 2.0.60",
 ]
 [[package]]
 name = "zerovec"
 version = "0.10.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "aa2b893d79df23bfb12d5461018d408ea19dfafe76c2c7ef6d4eba614f8ff079"
 dependencies = [
 "zerofrom",
 ]
 [[package]]
 name = "zip"
 version = "1.1.4"
--- a/dump/src/lib.rs
+++ b/dump/src/lib.rs
@ -286,6 +286,7 @@ pub(crate) mod test {
            pagination: Setting::NotSet,
            embedders: Setting::NotSet,
            search_cutoff_ms: Setting::NotSet,
            localized_attributes: Setting::NotSet,
            _kind: std::marker::PhantomData,
        };
        settings.check()
--- a/dump/src/reader/compat/v5_to_v6.rs
+++ b/dump/src/reader/compat/v5_to_v6.rs
@ -379,6 +379,7 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
                v5::Setting::NotSet => v6::Setting::NotSet,
            },
            embedders: v6::Setting::NotSet,
            localized_attributes: v6::Setting::NotSet,
            search_cutoff_ms: v6::Setting::NotSet,
            _kind: std::marker::PhantomData,
        }
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/Intel
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 2 {uid: 2, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: None, method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000001, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/adding
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/adding
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: succeeded, details: { received_documents: 1, indexed_documents: Some(1) }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 1 {uid: 1, status: enqueued, details: { received_documents: 1, indexed_documents: None }, kind: DocumentAdditionOrUpdate { index_uid: "doggos", primary_key: Some("id"), method: UpdateDocuments, content_file: 00000000-0000-0000-0000-000000000000, documents_count: 1, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/after_registering_settings_task_vectors.snap
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,]
--- a/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
+++ b/index-scheduler/src/snapshots/lib.rs/import_vectors/settings_update_processed_vectors.snap
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"A_fakerest": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(384), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet }), "B_small_hf": Set(EmbeddingSettings { source: Set(HuggingFace), model: Set("sentence-transformers/all-MiniLM-L6-v2"), revision: Set("e4ce9877abf3edfe10b0d82785e83bdcb973e22e"), api_key: NotSet, dimensions: NotSet, document_template: Set("{{doc.doggo}} the {{doc.breed}} best doggo"), url: NotSet, request: NotSet, response: NotSet, distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/after_registering_settings_task.snap
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: enqueued, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued [0,]
--- a/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
+++ b/index-scheduler/src/snapshots/lib.rs/test_settings_update/settings_update_processed.snap
@ -6,7 +6,7 @@ source: index-scheduler/src/lib.rs
 []
 ----------------------------------------------------------------------
 ### All Tasks:
-0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
+0 {uid: 0, status: succeeded, details: { settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> } }, kind: SettingsUpdate { index_uid: "doggos", new_settings: Settings { displayed_attributes: WildcardSetting(NotSet), searchable_attributes: WildcardSetting(NotSet), filterable_attributes: NotSet, sortable_attributes: NotSet, ranking_rules: NotSet, stop_words: NotSet, non_separator_tokens: NotSet, separator_tokens: NotSet, dictionary: NotSet, synonyms: NotSet, distinct_attribute: NotSet, proximity_precision: NotSet, typo_tolerance: NotSet, faceting: NotSet, pagination: NotSet, embedders: Set({"default": Set(EmbeddingSettings { source: Set(Rest), model: NotSet, revision: NotSet, api_key: Set("My super secret"), dimensions: Set(4), document_template: NotSet, url: Set("http://localhost:7777"), request: Set(String("{{text}}")), response: Set(String("{{embedding}}")), distribution: NotSet })}), search_cutoff_ms: NotSet, localized_attributes: NotSet, _kind: PhantomData<meilisearch_types::settings::Unchecked> }, is_deletion: false, allow_index_creation: true }}
 ----------------------------------------------------------------------
 ### Status:
 enqueued []
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -256,6 +256,7 @@ InvalidSearchCropLength               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchCropMarker               , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchFacets                   , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchSemanticRatio            , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchLocales                  , InvalidRequest       , BAD_REQUEST ;
 InvalidFacetSearchFacetName           , InvalidRequest       , BAD_REQUEST ;
 InvalidSimilarId                      , InvalidRequest       , BAD_REQUEST ;
 InvalidSearchFilter                   , InvalidRequest       , BAD_REQUEST ;
@ -297,6 +298,7 @@ InvalidSettingsSeparatorTokens        , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsDictionary             , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsSynonyms               , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsTypoTolerance          , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsLocalizedAttributes    , InvalidRequest       , BAD_REQUEST ;
 InvalidState                          , Internal             , INTERNAL_SERVER_ERROR ;
 InvalidStoreFile                      , Internal             , INTERNAL_SERVER_ERROR ;
 InvalidSwapDuplicateIndexFound        , InvalidRequest       , BAD_REQUEST ;
--- a/meilisearch-types/src/lib.rs
+++ b/meilisearch-types/src/lib.rs
@ -7,6 +7,7 @@ pub mod features;
 pub mod index_uid;
 pub mod index_uid_pattern;
 pub mod keys;
 pub mod locales;
 pub mod settings;
 pub mod star_or;
 pub mod task_view;
--- a/meilisearch-types/src/locales.rs
+++ b/meilisearch-types/src/locales.rs
@ -0,0 +1,157 @@
 use deserr::Deserr;
 use milli::LocalizedAttributesRule;
 use serde::{Deserialize, Serialize};
 use serde_json::json;
 /// Generate a Locale enum and its From and Into implementations for milli::tokenizer::Language.
 ///
 /// this enum implements `Deserr` in order to be used in the API.
 macro_rules! make_locale {
    ($($language:tt), +) => {
        #[derive(Debug, Copy, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize, Ord, PartialOrd)]
        #[deserr(rename_all = camelCase)]
        #[serde(rename_all = "camelCase")]
        pub enum Locale {
            $($language),+,
        }
        impl From<milli::tokenizer::Language> for Locale {
            fn from(other: milli::tokenizer::Language) -> Locale {
                match other {
                    $(milli::tokenizer::Language::$language => Locale::$language), +
                }
            }
        }
        impl From<Locale> for milli::tokenizer::Language {
            fn from(other: Locale) -> milli::tokenizer::Language {
                match other {
                    $(Locale::$language => milli::tokenizer::Language::$language), +,
                }
            }
        }
        #[derive(Debug)]
        pub struct LocaleFormatError {
            pub invalid_locale: String,
        }
        impl std::fmt::Display for LocaleFormatError {
            fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
                let valid_locales = [$(Locale::$language),+].iter().map(|l| format!("`{}`", json!(l).as_str().unwrap())).collect::<Vec<_>>().join(", ");
                write!(f, "Unsupported locale `{}`, expected one of {}", self.invalid_locale, valid_locales)
            }
        }
    };
 }
 make_locale! {
    Epo,
    Eng,
    Rus,
    Cmn,
    Spa,
    Por,
    Ita,
    Ben,
    Fra,
    Deu,
    Ukr,
    Kat,
    Ara,
    Hin,
    Jpn,
    Heb,
    Yid,
    Pol,
    Amh,
    Jav,
    Kor,
    Nob,
    Dan,
    Swe,
    Fin,
    Tur,
    Nld,
    Hun,
    Ces,
    Ell,
    Bul,
    Bel,
    Mar,
    Kan,
    Ron,
    Slv,
    Hrv,
    Srp,
    Mkd,
    Lit,
    Lav,
    Est,
    Tam,
    Vie,
    Urd,
    Tha,
    Guj,
    Uzb,
    Pan,
    Aze,
    Ind,
    Tel,
    Pes,
    Mal,
    Ori,
    Mya,
    Nep,
    Sin,
    Khm,
    Tuk,
    Aka,
    Zul,
    Sna,
    Afr,
    Lat,
    Slk,
    Cat,
    Tgl,
    Hye
 }
 impl std::error::Error for LocaleFormatError {}
 impl std::str::FromStr for Locale {
    type Err = LocaleFormatError;
    fn from_str(s: &str) -> Result<Self, Self::Err> {
        milli::tokenizer::Language::from_code(s)
            .map(Self::from)
            .ok_or(LocaleFormatError { invalid_locale: s.to_string() })
    }
 }
 #[derive(Debug, Clone, PartialEq, Eq, Deserr, Serialize, Deserialize)]
 #[deserr(rename_all = camelCase)]
 #[serde(rename_all = "camelCase")]
 pub struct LocalizedAttributesRuleView {
    pub attribute_patterns: Vec<String>,
    pub locales: Vec<Locale>,
 }
 impl From<LocalizedAttributesRule> for LocalizedAttributesRuleView {
    fn from(rule: LocalizedAttributesRule) -> Self {
        Self {
            attribute_patterns: rule.attribute_patterns,
            locales: rule.locales.into_iter().map(|l| l.into()).collect(),
        }
    }
 }
 impl From<LocalizedAttributesRuleView> for LocalizedAttributesRule {
    fn from(view: LocalizedAttributesRuleView) -> Self {
        Self {
            attribute_patterns: view.attribute_patterns,
            locales: view.locales.into_iter().map(|l| l.into()).collect(),
        }
    }
 }
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@ -17,6 +17,7 @@ use serde::{Deserialize, Serialize, Serializer};
 use crate::deserr::DeserrJsonError;
 use crate::error::deserr_codes::*;
 use crate::facet_values_sort::FacetValuesSort;
 use crate::locales::LocalizedAttributesRuleView;
 /// The maximum number of results that the engine
 /// will be able to return in one search call.
@ -198,6 +199,9 @@ pub struct Settings<T> {
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsSearchCutoffMs>)]
    pub search_cutoff_ms: Setting<u64>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsLocalizedAttributes>)]
    pub localized_attributes: Setting<Vec<LocalizedAttributesRuleView>>,
    #[serde(skip)]
    #[deserr(skip)]
@ -261,6 +265,7 @@ impl Settings<Checked> {
            pagination: Setting::Reset,
            embedders: Setting::Reset,
            search_cutoff_ms: Setting::Reset,
            localized_attributes: Setting::Reset,
            _kind: PhantomData,
        }
    }
@ -284,7 +289,8 @@ impl Settings<Checked> {
            pagination,
            embedders,
            search_cutoff_ms,
-            ..
+            localized_attributes: localized_attributes_rules,
            _kind,
        } = self;
        Settings {
@ -305,6 +311,7 @@ impl Settings<Checked> {
            pagination,
            embedders,
            search_cutoff_ms,
            localized_attributes: localized_attributes_rules,
            _kind: PhantomData,
        }
    }
@ -352,6 +359,7 @@ impl Settings<Unchecked> {
            pagination: self.pagination,
            embedders: self.embedders,
            search_cutoff_ms: self.search_cutoff_ms,
            localized_attributes: self.localized_attributes,
            _kind: PhantomData,
        }
    }
@ -402,6 +410,7 @@ pub fn apply_settings_to_builder(
        pagination,
        embedders,
        search_cutoff_ms,
        localized_attributes: localized_attributes_rules,
        _kind,
    } = settings;
@ -485,6 +494,13 @@ pub fn apply_settings_to_builder(
        Setting::NotSet => (),
    }
    match localized_attributes_rules {
        Setting::Set(ref rules) => builder
            .set_localized_attributes_rules(rules.iter().cloned().map(|r| r.into()).collect()),
        Setting::Reset => builder.reset_localized_attributes_rules(),
        Setting::NotSet => (),
    }
    match typo_tolerance {
        Setting::Set(ref value) => {
            match value.enabled {
@ -679,6 +695,8 @@ pub fn settings(
    let search_cutoff_ms = index.search_cutoff(rtxn)?;
    let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
    let mut settings = Settings {
        displayed_attributes: match displayed_attributes {
            Some(attrs) => Setting::Set(attrs),
@ -711,6 +729,10 @@ pub fn settings(
            Some(cutoff) => Setting::Set(cutoff),
            None => Setting::Reset,
        },
        localized_attributes: match localized_attributes_rules {
            Some(rules) => Setting::Set(rules.into_iter().map(|r| r.into()).collect()),
            None => Setting::Reset,
        },
        _kind: PhantomData,
    };
@ -902,6 +924,7 @@ pub(crate) mod test {
            faceting: Setting::NotSet,
            pagination: Setting::NotSet,
            embedders: Setting::NotSet,
            localized_attributes: Setting::NotSet,
            search_cutoff_ms: Setting::NotSet,
            _kind: PhantomData::<Unchecked>,
        };
@ -930,6 +953,7 @@ pub(crate) mod test {
            faceting: Setting::NotSet,
            pagination: Setting::NotSet,
            embedders: Setting::NotSet,
            localized_attributes: Setting::NotSet,
            search_cutoff_ms: Setting::NotSet,
            _kind: PhantomData::<Unchecked>,
        };
--- a/meilisearch/src/analytics/segment_analytics.rs
+++ b/meilisearch/src/analytics/segment_analytics.rs
@ -1,4 +1,4 @@
-use std::collections::{BinaryHeap, HashMap, HashSet};
+use std::collections::{BTreeSet, BinaryHeap, HashMap, HashSet};
 use std::fs;
 use std::mem::take;
 use std::path::{Path, PathBuf};
@ -10,6 +10,7 @@ use actix_web::HttpRequest;
 use byte_unit::Byte;
 use index_scheduler::IndexScheduler;
 use meilisearch_auth::{AuthController, AuthFilter};
 use meilisearch_types::locales::Locale;
 use meilisearch_types::InstanceUid;
 use once_cell::sync::Lazy;
 use regex::Regex;
@ -653,6 +654,9 @@ pub struct SearchAggregator {
    // every time a search is done, we increment the counter linked to the used settings
    matching_strategy: HashMap<String, usize>,
    // List of the unique Locales passed as parameter
    locales: BTreeSet<Locale>,
    // pagination
    max_limit: usize,
    max_offset: usize,
@ -707,6 +711,7 @@ impl SearchAggregator {
            attributes_to_search_on,
            hybrid,
            ranking_score_threshold,
            locales,
        } = query;
        let mut ret = Self::default();
@ -774,6 +779,10 @@ impl SearchAggregator {
        ret.matching_strategy.insert(format!("{:?}", matching_strategy), 1);
        if let Some(locales) = locales {
            ret.locales = locales.iter().copied().collect();
        }
        ret.highlight_pre_tag = *highlight_pre_tag != DEFAULT_HIGHLIGHT_PRE_TAG();
        ret.highlight_post_tag = *highlight_post_tag != DEFAULT_HIGHLIGHT_POST_TAG();
        ret.crop_marker = *crop_marker != DEFAULT_CROP_MARKER();
@ -859,6 +868,7 @@ impl SearchAggregator {
            total_degraded,
            total_used_negative_operator,
            ranking_score_threshold,
            ref mut locales,
        } = other;
        if self.timestamp.is_none() {
@ -947,6 +957,9 @@ impl SearchAggregator {
        self.show_ranking_score |= show_ranking_score;
        self.show_ranking_score_details |= show_ranking_score_details;
        self.ranking_score_threshold |= ranking_score_threshold;
        // locales
        self.locales.append(locales);
    }
    pub fn into_event(self, user: &User, event_name: &str) -> Option<Track> {
@ -991,6 +1004,7 @@ impl SearchAggregator {
            total_degraded,
            total_used_negative_operator,
            ranking_score_threshold,
            locales,
        } = self;
        if total_received == 0 {
@ -1060,6 +1074,7 @@ impl SearchAggregator {
                "matching_strategy": {
                    "most_used_strategy": matching_strategy.iter().max_by_key(|(_, v)| *v).map(|(k, _)| json!(k)).unwrap_or_else(|| json!(null)),
                },
                "locales": locales,
                "scoring": {
                    "show_ranking_score": show_ranking_score,
                    "show_ranking_score_details": show_ranking_score_details,
@ -1150,6 +1165,7 @@ impl MultiSearchAggregator {
                    attributes_to_search_on: _,
                    hybrid: _,
                    ranking_score_threshold: _,
                    locales: _,
                } = query;
                index_uid.as_str()
@ -1307,6 +1323,7 @@ impl FacetSearchAggregator {
            attributes_to_search_on,
            hybrid,
            ranking_score_threshold,
            locales,
        } = query;
        let mut ret = Self::default();
@ -1322,7 +1339,8 @@ impl FacetSearchAggregator {
            || *matching_strategy != MatchingStrategy::default()
            || attributes_to_search_on.is_some()
            || hybrid.is_some()
-            || ranking_score_threshold.is_some();
+            || ranking_score_threshold.is_some()
            || locales.is_some();
        ret
    }
--- a/meilisearch/src/routes/indexes/facet_search.rs
+++ b/meilisearch/src/routes/indexes/facet_search.rs
@ -6,6 +6,7 @@ use meilisearch_types::deserr::DeserrJsonError;
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::locales::Locale;
 use serde_json::Value;
 use tracing::debug;
@ -48,6 +49,8 @@ pub struct FacetSearchQuery {
    pub attributes_to_search_on: Option<Vec<String>>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
    pub ranking_score_threshold: Option<RankingScoreThreshold>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
    pub locales: Option<Vec<Locale>>,
 }
 pub async fn search(
@ -67,6 +70,7 @@ pub async fn search(
    let facet_query = query.facet_query.clone();
    let facet_name = query.facet_name.clone();
    let locales = query.locales.clone().map(|l| l.into_iter().map(Into::into).collect());
    let mut search_query = SearchQuery::from(query);
    // Tenant token search_rules.
@ -86,6 +90,7 @@ pub async fn search(
            facet_name,
            search_kind,
            index_scheduler.features(),
            locales,
        )
    })
    .await?;
@ -113,6 +118,7 @@ impl From<FacetSearchQuery> for SearchQuery {
            attributes_to_search_on,
            hybrid,
            ranking_score_threshold,
            locales,
        } = value;
        SearchQuery {
@ -141,6 +147,7 @@ impl From<FacetSearchQuery> for SearchQuery {
            attributes_to_search_on,
            hybrid,
            ranking_score_threshold,
            locales,
        }
    }
 }
--- a/meilisearch/src/routes/indexes/search.rs
+++ b/meilisearch/src/routes/indexes/search.rs
@ -7,6 +7,7 @@ use meilisearch_types::deserr::{DeserrJsonError, DeserrQueryParamError};
 use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::ResponseError;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::locales::Locale;
 use meilisearch_types::milli;
 use meilisearch_types::serde_cs::vec::CS;
 use serde_json::Value;
@ -89,6 +90,8 @@ pub struct SearchQueryGet {
    pub hybrid_semantic_ratio: Option<SemanticRatioGet>,
    #[deserr(default, error = DeserrQueryParamError<InvalidSearchRankingScoreThreshold>)]
    pub ranking_score_threshold: Option<RankingScoreThresholdGet>,
    #[deserr(default, error = DeserrQueryParamError<InvalidSearchLocales>)]
    pub locales: Option<CS<Locale>>,
 }
 #[derive(Debug, Clone, Copy, PartialEq, deserr::Deserr)]
@ -175,6 +178,7 @@ impl From<SearchQueryGet> for SearchQuery {
            attributes_to_search_on: other.attributes_to_search_on.map(|o| o.into_iter().collect()),
            hybrid,
            ranking_score_threshold: other.ranking_score_threshold.map(|o| o.0),
            locales: other.locales.map(|o| o.into_iter().collect()),
        }
    }
 }
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@ -474,6 +474,28 @@ make_setting_route!(
    }
 );
 make_setting_route!(
    "/localized-attributes",
    put,
    Vec<meilisearch_types::locales::LocalizedAttributesRuleView>,
    meilisearch_types::deserr::DeserrJsonError<
        meilisearch_types::error::deserr_codes::InvalidSettingsLocalizedAttributes,
    >,
    localized_attributes,
    "localizedAttributes",
    analytics,
    |rules: &Option<Vec<meilisearch_types::locales::LocalizedAttributesRuleView>>, req: &HttpRequest| {
        use serde_json::json;
        analytics.publish(
            "LocalizedAttributesRules Updated".to_string(),
            json!({
                "locales": rules.as_ref().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>())
            }),
            Some(req),
        );
    }
 );
 make_setting_route!(
    "/ranking-rules",
    put,
@ -786,6 +808,7 @@ pub async fn update_all(
            },
            "embedders": crate::routes::indexes::settings::embedder_analytics(new_settings.embedders.as_ref().set()),
            "search_cutoff_ms": new_settings.search_cutoff_ms.as_ref().set(),
            "locales": new_settings.localized_attributes.as_ref().set().map(|rules| rules.iter().flat_map(|rule| rule.locales.iter().cloned()).collect::<std::collections::BTreeSet<_>>()),
        }),
        Some(&req),
    );
--- a/meilisearch/src/search/federated.rs
+++ b/meilisearch/src/search/federated.rs
@ -380,9 +380,6 @@ pub fn perform_federated_search(
        let criteria = index.criteria(&rtxn)?;
        // stuff we need for the hitmaker
        let script_lang_map = index.script_language(&rtxn)?;
        let dictionary = index.dictionary(&rtxn)?;
        let dictionary: Option<Vec<_>> =
            dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
@ -494,6 +491,7 @@ pub fn perform_federated_search(
                    sort: query.sort,
                    show_ranking_score: query.show_ranking_score,
                    show_ranking_score_details: query.show_ranking_score_details,
                    locales: query.locales.map(|l| l.iter().copied().map(Into::into).collect()),
                };
                let milli::SearchResult {
@ -509,11 +507,7 @@ pub fn perform_federated_search(
                degraded |= query_degraded;
                used_negative_operator |= query_used_negative_operator;
-                let tokenizer = HitMaker::tokenizer(
+                let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
                    &script_lang_map,
                    dictionary.as_deref(),
                    separators.as_deref(),
                );
                let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
--- a/meilisearch/src/search/mod.rs
+++ b/meilisearch/src/search/mod.rs
@ -1,6 +1,6 @@
 use core::fmt;
 use std::cmp::min;
-use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
+use std::collections::{BTreeMap, BTreeSet, HashSet};
 use std::str::FromStr;
 use std::sync::Arc;
 use std::time::{Duration, Instant};
@ -15,16 +15,17 @@ use meilisearch_types::error::deserr_codes::*;
 use meilisearch_types::error::{Code, ResponseError};
 use meilisearch_types::heed::RoTxn;
 use meilisearch_types::index_uid::IndexUid;
 use meilisearch_types::locales::Locale;
 use meilisearch_types::milli::score_details::{ScoreDetails, ScoringStrategy};
 use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
 use meilisearch_types::milli::vector::Embedder;
 use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
 use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
 use meilisearch_types::{milli, Document};
-use milli::tokenizer::TokenizerBuilder;
+use milli::tokenizer::{Language, TokenizerBuilder};
 use milli::{
-    AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, MatchBounds, MatcherBuilder,
+    AscDesc, FieldId, FieldsIdsMap, Filter, FormatOptions, Index, LocalizedAttributesRule,
-    SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
+    MatchBounds, MatcherBuilder, SortError, TermsMatchingStrategy, DEFAULT_VALUES_PER_FACET,
 };
 use regex::Regex;
 use serde::Serialize;
@ -100,6 +101,8 @@ pub struct SearchQuery {
    pub attributes_to_search_on: Option<Vec<String>>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
    pub ranking_score_threshold: Option<RankingScoreThreshold>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
    pub locales: Option<Vec<Locale>>,
 }
 #[derive(Debug, Clone, Copy, PartialEq, Deserr)]
@ -169,6 +172,7 @@ impl fmt::Debug for SearchQuery {
            matching_strategy,
            attributes_to_search_on,
            ranking_score_threshold,
            locales,
        } = self;
        let mut debug = f.debug_struct("SearchQuery");
@ -250,6 +254,10 @@ impl fmt::Debug for SearchQuery {
            debug.field("ranking_score_threshold", &ranking_score_threshold);
        }
        if let Some(locales) = locales {
            debug.field("locales", &locales);
        }
        debug.finish()
    }
 }
@ -425,6 +433,8 @@ pub struct SearchQueryWithIndex {
    pub attributes_to_search_on: Option<Vec<String>>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchRankingScoreThreshold>, default)]
    pub ranking_score_threshold: Option<RankingScoreThreshold>,
    #[deserr(default, error = DeserrJsonError<InvalidSearchLocales>, default)]
    pub locales: Option<Vec<Locale>>,
    #[deserr(default)]
    pub federation_options: Option<FederationOptions>,
@ -477,6 +487,7 @@ impl SearchQueryWithIndex {
            attributes_to_search_on,
            hybrid,
            ranking_score_threshold,
            locales,
        } = self;
        (
            index_uid,
@ -506,6 +517,7 @@ impl SearchQueryWithIndex {
                attributes_to_search_on,
                hybrid,
                ranking_score_threshold,
                locales,
                // do not use ..Default::default() here,
                // rather add any missing field from `SearchQuery` to `SearchQueryWithIndex`
            },
@ -866,6 +878,10 @@ fn prepare_search<'t>(
        search.sort_criteria(sort);
    }
    if let Some(ref locales) = query.locales {
        search.locales(locales.iter().copied().map(Into::into).collect());
    }
    Ok((search, is_finite_pagination, max_total_hits, offset))
 }
@ -917,6 +933,7 @@ pub fn perform_search(
        highlight_pre_tag,
        highlight_post_tag,
        crop_marker,
        locales,
        // already used in prepare_search
        vector: _,
        hybrid: _,
@ -941,6 +958,7 @@ pub fn perform_search(
        sort,
        show_ranking_score,
        show_ranking_score_details,
        locales: locales.map(|l| l.iter().copied().map(Into::into).collect()),
    };
    let documents = make_hits(
@ -1046,6 +1064,7 @@ struct AttributesFormat {
    sort: Option<Vec<String>>,
    show_ranking_score: bool,
    show_ranking_score_details: bool,
    locales: Option<Vec<Language>>,
 }
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
@ -1093,19 +1112,16 @@ struct HitMaker<'a> {
    show_ranking_score_details: bool,
    sort: Option<Vec<String>>,
    show_matches_position: bool,
    locales: Option<Vec<Language>>,
 }
 impl<'a> HitMaker<'a> {
    pub fn tokenizer<'b>(
        script_lang_map: &'b HashMap<milli::tokenizer::Script, Vec<milli::tokenizer::Language>>,
        dictionary: Option<&'b [&'b str]>,
        separators: Option<&'b [&'b str]>,
    ) -> milli::tokenizer::Tokenizer<'b> {
        let mut tokenizer_builder = TokenizerBuilder::default();
        tokenizer_builder.create_char_map(true);
        if !script_lang_map.is_empty() {
            tokenizer_builder.allow_list(script_lang_map);
        }
        if let Some(separators) = separators {
            tokenizer_builder.separators(separators);
@ -1218,6 +1234,7 @@ impl<'a> HitMaker<'a> {
            show_ranking_score_details: format.show_ranking_score_details,
            show_matches_position: format.show_matches_position,
            sort: format.sort,
            locales: format.locales,
        })
    }
@ -1273,6 +1290,9 @@ impl<'a> HitMaker<'a> {
            document.insert("_vectors".into(), vectors.into());
        }
        let localized_attributes =
            self.index.localized_attributes_rules(self.rtxn)?.unwrap_or_default();
        let (matches_position, formatted) = format_fields(
            &displayed_document,
            &self.fields_ids_map,
@ -1280,6 +1300,8 @@ impl<'a> HitMaker<'a> {
            &self.formatted_options,
            self.show_matches_position,
            &self.displayed_ids,
            self.locales.as_deref(),
            &localized_attributes,
        )?;
        if let Some(sort) = self.sort.as_ref() {
@ -1312,8 +1334,6 @@ fn make_hits<'a>(
 ) -> Result<Vec<SearchHit>, MeilisearchHttpError> {
    let mut documents = Vec::new();
    let script_lang_map = index.script_language(rtxn)?;
    let dictionary = index.dictionary(rtxn)?;
    let dictionary: Option<Vec<_>> =
        dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
@ -1321,8 +1341,7 @@ fn make_hits<'a>(
    let separators: Option<Vec<_>> =
        separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
-    let tokenizer =
+    let tokenizer = HitMaker::tokenizer(dictionary.as_deref(), separators.as_deref());
        HitMaker::tokenizer(&script_lang_map, dictionary.as_deref(), separators.as_deref());
    let formatter_builder = HitMaker::formatter_builder(matching_words, tokenizer);
@ -1341,6 +1360,7 @@ pub fn perform_facet_search(
    facet_name: String,
    search_kind: SearchKind,
    features: RoFeatures,
    locales: Option<Vec<Language>>,
 ) -> Result<FacetSearchResult, ResponseError> {
    let before_search = Instant::now();
    let rtxn = index.read_txn()?;
@ -1349,6 +1369,14 @@ pub fn perform_facet_search(
        None => TimeBudget::default(),
    };
    let localized_attributes = index.localized_attributes_rules(&rtxn)?.unwrap_or_default();
    let locales = locales.or_else(|| {
        localized_attributes
            .into_iter()
            .find(|attr| attr.match_str(&facet_name))
            .map(|attr| attr.locales)
    });
    let (search, _, _, _) =
        prepare_search(index, &rtxn, &search_query, &search_kind, time_budget, features)?;
    let mut facet_search = SearchForFacetValues::new(
@ -1363,6 +1391,10 @@ pub fn perform_facet_search(
        facet_search.max_values(max_facets as usize);
    }
    if let Some(locales) = locales {
        facet_search.locales(locales);
    }
    Ok(FacetSearchResult {
        facet_hits: facet_search.execute()?,
        facet_query,
@ -1443,6 +1475,7 @@ pub fn perform_similar(
        sort: None,
        show_ranking_score,
        show_ranking_score_details,
        locales: None,
    };
    let hits = make_hits(
@ -1624,6 +1657,7 @@ fn make_document(
    Ok(document)
 }
 #[allow(clippy::too_many_arguments)]
 fn format_fields(
    document: &Document,
    field_ids_map: &FieldsIdsMap,
@ -1631,6 +1665,8 @@ fn format_fields(
    formatted_options: &BTreeMap<FieldId, FormatOptions>,
    compute_matches: bool,
    displayable_ids: &BTreeSet<FieldId>,
    locales: Option<&[Language]>,
    localized_attributes: &[LocalizedAttributesRule],
 ) -> Result<(Option<MatchesPosition>, Document), MeilisearchHttpError> {
    let mut matches_position = compute_matches.then(BTreeMap::new);
    let mut document = document.clone();
@ -1663,7 +1699,22 @@ fn format_fields(
            .reduce(|acc, option| acc.merge(option));
        let mut infos = Vec::new();
-        *value = format_value(std::mem::take(value), builder, format, &mut infos, compute_matches);
+        // if no locales has been provided, we try to find the locales in the localized_attributes.
        let locales = locales.or_else(|| {
            localized_attributes
                .iter()
                .find(|rule| rule.match_str(key))
                .map(LocalizedAttributesRule::locales)
        });
        *value = format_value(
            std::mem::take(value),
            builder,
            format,
            &mut infos,
            compute_matches,
            locales,
        );
        if let Some(matches) = matches_position.as_mut() {
            if !infos.is_empty() {
@ -1688,10 +1739,11 @@ fn format_value(
    format_options: Option<FormatOptions>,
    infos: &mut Vec<MatchBounds>,
    compute_matches: bool,
    locales: Option<&[Language]>,
 ) -> Value {
    match value {
        Value::String(old_string) => {
-            let mut matcher = builder.build(&old_string);
+            let mut matcher = builder.build(&old_string, locales);
            if compute_matches {
                let matches = matcher.matches();
                infos.extend_from_slice(&matches[..]);
@ -1718,6 +1770,7 @@ fn format_value(
                        }),
                        infos,
                        compute_matches,
                        locales,
                    )
                })
                .collect(),
@ -1737,6 +1790,7 @@ fn format_value(
                            }),
                            infos,
                            compute_matches,
                            locales,
                        ),
                    )
                })
@ -1745,7 +1799,7 @@ fn format_value(
        Value::Number(number) => {
            let s = number.to_string();
-            let mut matcher = builder.build(&s);
+            let mut matcher = builder.build(&s, locales);
            if compute_matches {
                let matches = matcher.matches();
                infos.extend_from_slice(&matches[..]);
--- a/meilisearch/tests/dumps/mod.rs
+++ b/meilisearch/tests/dumps/mod.rs
@ -78,7 +78,8 @@ async fn import_dump_v1_movie_raw() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -240,7 +241,8 @@ async fn import_dump_v1_movie_with_settings() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -388,7 +390,8 @@ async fn import_dump_v1_rubygems_with_settings() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -522,7 +525,8 @@ async fn import_dump_v2_movie_raw() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -668,7 +672,8 @@ async fn import_dump_v2_movie_with_settings() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -813,7 +818,8 @@ async fn import_dump_v2_rubygems_with_settings() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -947,7 +953,8 @@ async fn import_dump_v3_movie_raw() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -1093,7 +1100,8 @@ async fn import_dump_v3_movie_with_settings() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -1238,7 +1246,8 @@ async fn import_dump_v3_rubygems_with_settings() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -1372,7 +1381,8 @@ async fn import_dump_v4_movie_raw() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -1518,7 +1528,8 @@ async fn import_dump_v4_movie_with_settings() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -1663,7 +1674,8 @@ async fn import_dump_v4_rubygems_with_settings() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###
    );
@ -1909,7 +1921,8 @@ async fn import_dump_v6_containing_experimental_features() {
      "pagination": {
        "maxTotalHits": 1000
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###);
@ -2087,7 +2100,8 @@ async fn generate_and_import_dump_containing_vectors() {
          "documentTemplate": "{{doc.doggo}}"
        }
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###);
--- a/meilisearch/tests/search/locales.rs
+++ b/meilisearch/tests/search/locales.rs
--- a/meilisearch/tests/search/mod.rs
+++ b/meilisearch/tests/search/mod.rs
@ -7,6 +7,7 @@ mod facet_search;
 mod formatted;
 mod geo;
 mod hybrid;
 mod locales;
 mod matching_strategy;
 mod multi;
 mod pagination;
--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@ -55,7 +55,7 @@ async fn get_settings() {
    let (response, code) = index.settings().await;
    assert_eq!(code, 200);
    let settings = response.as_object().unwrap();
-    assert_eq!(settings.keys().len(), 16);
+    assert_eq!(settings.keys().len(), 17);
    assert_eq!(settings["displayedAttributes"], json!(["*"]));
    assert_eq!(settings["searchableAttributes"], json!(["*"]));
    assert_eq!(settings["filterableAttributes"], json!([]));
@ -195,7 +195,8 @@ async fn secrets_are_hidden_in_settings() {
          "response": "{{embedding}}"
        }
      },
-      "searchCutoffMs": null
+      "searchCutoffMs": null,
      "localizedAttributes": null
    }
    "###);
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -17,7 +17,7 @@ bincode = "1.3.3"
 bstr = "1.9.1"
 bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
 byteorder = "1.5.0"
-charabia = { version = "0.8.12", default-features = false }
+charabia = { git = "https://github.com/meilisearch/charabia.git", branch = "simplify-lang-detection", default-features = false }
 concat-arrays = "0.1.2"
 crossbeam-channel = "0.5.13"
 deserr = "0.6.2"
--- a/milli/examples/search.rs
+++ b/milli/examples/search.rs
@ -68,6 +68,7 @@ fn main() -> Result<(), Box<dyn Error>> {
                logger,
                TimeBudget::max(),
                None,
                None,
            )?;
            if let Some((logger, dir)) = detailed_logger {
                logger.finish(&mut ctx, Path::new(dir))?;
--- a/milli/src/heed_codec/mod.rs
+++ b/milli/src/heed_codec/mod.rs
@ -7,7 +7,6 @@ mod fst_set_codec;
 mod obkv_codec;
 mod roaring_bitmap;
 mod roaring_bitmap_length;
 mod script_language_codec;
 mod str_beu32_codec;
 mod str_ref;
 mod str_str_u8_codec;
@ -26,7 +25,6 @@ pub use self::roaring_bitmap::{BoRoaringBitmapCodec, CboRoaringBitmapCodec, Roar
 pub use self::roaring_bitmap_length::{
    BoRoaringBitmapLenCodec, CboRoaringBitmapLenCodec, RoaringBitmapLenCodec,
 };
 pub use self::script_language_codec::ScriptLanguageCodec;
 pub use self::str_beu32_codec::{StrBEU16Codec, StrBEU32Codec};
 pub use self::str_str_u8_codec::{U8StrStrCodec, UncheckedU8StrStrCodec};
--- a/milli/src/heed_codec/script_language_codec.rs
+++ b/milli/src/heed_codec/script_language_codec.rs
@ -1,39 +0,0 @@
 use std::borrow::Cow;
 use std::ffi::CStr;
 use std::str;
 use charabia::{Language, Script};
 use heed::BoxedError;
 pub struct ScriptLanguageCodec;
 impl<'a> heed::BytesDecode<'a> for ScriptLanguageCodec {
    type DItem = (Script, Language);
    fn bytes_decode(bytes: &'a [u8]) -> Result<Self::DItem, BoxedError> {
        let cstr = CStr::from_bytes_until_nul(bytes)?;
        let script = cstr.to_str()?;
        let script_name = Script::from_name(script);
        // skip '\0' byte between the two strings.
        let lan = str::from_utf8(&bytes[script.len() + 1..])?;
        let lan_name = Language::from_name(lan);
        Ok((script_name, lan_name))
    }
 }
 impl<'a> heed::BytesEncode<'a> for ScriptLanguageCodec {
    type EItem = (Script, Language);
    fn bytes_encode((script, lan): &Self::EItem) -> Result<Cow<'a, [u8]>, BoxedError> {
        let script_name = script.name().as_bytes();
        let lan_name = lan.name().as_bytes();
        let mut bytes = Vec::with_capacity(script_name.len() + lan_name.len() + 1);
        bytes.extend_from_slice(script_name);
        bytes.push(0);
        bytes.extend_from_slice(lan_name);
        Ok(Cow::Owned(bytes))
    }
 }
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -4,7 +4,6 @@ use std::convert::TryInto;
 use std::fs::File;
 use std::path::Path;
 use charabia::{Language, Script};
 use heed::types::*;
 use heed::{CompactionOption, Database, RoTxn, RwTxn, Unspecified};
 use roaring::RoaringBitmap;
@ -19,9 +18,7 @@ use crate::heed_codec::facet::{
    FacetGroupKeyCodec, FacetGroupValueCodec, FieldDocIdFacetF64Codec, FieldDocIdFacetStringCodec,
    FieldIdCodec, OrderedF64Codec,
 };
-use crate::heed_codec::{
+use crate::heed_codec::{BEU16StrCodec, FstSetCodec, StrBEU16Codec, StrRefCodec};
    BEU16StrCodec, FstSetCodec, ScriptLanguageCodec, StrBEU16Codec, StrRefCodec,
 };
 use crate::order_by_map::OrderByMap;
 use crate::proximity::ProximityPrecision;
 use crate::vector::parsed_vectors::RESERVED_VECTORS_FIELD_NAME;
@ -29,8 +26,8 @@ use crate::vector::{Embedding, EmbeddingConfig};
 use crate::{
    default_criteria, CboRoaringBitmapCodec, Criterion, DocumentId, ExternalDocumentsIds,
    FacetDistribution, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldIdWordCountCodec,
-    FieldidsWeightsMap, GeoPoint, ObkvCodec, Result, RoaringBitmapCodec, RoaringBitmapLenCodec,
+    FieldidsWeightsMap, GeoPoint, LocalizedAttributesRule, ObkvCodec, Result, RoaringBitmapCodec,
-    Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
+    RoaringBitmapLenCodec, Search, U8StrStrCodec, Weight, BEU16, BEU32, BEU64,
 };
 pub const DEFAULT_MIN_WORD_LEN_ONE_TYPO: u8 = 5;
@ -73,6 +70,7 @@ pub mod main_key {
    pub const PROXIMITY_PRECISION: &str = "proximity-precision";
    pub const EMBEDDING_CONFIGS: &str = "embedding_configs";
    pub const SEARCH_CUTOFF: &str = "search_cutoff";
    pub const LOCALIZED_ATTRIBUTES_RULES: &str = "localized_attributes_rules";
 }
 pub mod db_name {
@ -101,7 +99,6 @@ pub mod db_name {
    pub const VECTOR_EMBEDDER_CATEGORY_ID: &str = "vector-embedder-category-id";
    pub const VECTOR_ARROY: &str = "vector-arroy";
    pub const DOCUMENTS: &str = "documents";
    pub const SCRIPT_LANGUAGE_DOCIDS: &str = "script_language_docids";
 }
 #[derive(Clone)]
@ -142,9 +139,6 @@ pub struct Index {
    /// Maps the word prefix and a field id with all the docids where the prefix appears inside the field
    pub word_prefix_fid_docids: Database<StrBEU16Codec, CboRoaringBitmapCodec>,
    /// Maps the script and language with all the docids that corresponds to it.
    pub script_language_docids: Database<ScriptLanguageCodec, RoaringBitmapCodec>,
    /// Maps the facet field id and the docids for which this field exists
    pub facet_id_exists_docids: Database<FieldIdCodec, CboRoaringBitmapCodec>,
    /// Maps the facet field id and the docids for which this field is set as null
@ -198,8 +192,6 @@ impl Index {
            env.create_database(&mut wtxn, Some(EXACT_WORD_PREFIX_DOCIDS))?;
        let word_pair_proximity_docids =
            env.create_database(&mut wtxn, Some(WORD_PAIR_PROXIMITY_DOCIDS))?;
        let script_language_docids =
            env.create_database(&mut wtxn, Some(SCRIPT_LANGUAGE_DOCIDS))?;
        let word_position_docids = env.create_database(&mut wtxn, Some(WORD_POSITION_DOCIDS))?;
        let word_fid_docids = env.create_database(&mut wtxn, Some(WORD_FIELD_ID_DOCIDS))?;
        let field_id_word_count_docids =
@ -243,7 +235,6 @@ impl Index {
            word_prefix_docids,
            exact_word_prefix_docids,
            word_pair_proximity_docids,
            script_language_docids,
            word_position_docids,
            word_fid_docids,
            word_prefix_position_docids,
@ -1562,46 +1553,32 @@ impl Index {
        self.main.remap_key_type::<Str>().delete(txn, main_key::PROXIMITY_PRECISION)
    }
-    /* script  language docids */
+    pub fn localized_attributes_rules(
    /// Retrieve all the documents ids that correspond with (Script, Language) key, `None` if it is any.
    pub fn script_language_documents_ids(
        &self,
        rtxn: &RoTxn<'_>,
-        key: &(Script, Language),
+    ) -> heed::Result<Option<Vec<LocalizedAttributesRule>>> {
-    ) -> heed::Result<Option<RoaringBitmap>> {
+        self.main
-        self.script_language_docids.get(rtxn, key)
+            .remap_types::<Str, SerdeJson<Vec<LocalizedAttributesRule>>>()
            .get(rtxn, main_key::LOCALIZED_ATTRIBUTES_RULES)
    }
-    pub fn script_language(
+    pub(crate) fn put_localized_attributes_rules(
        &self,
-        rtxn: &RoTxn<'_>,
+        txn: &mut RwTxn<'_>,
-    ) -> heed::Result<HashMap<Script, Vec<Language>>> {
+        val: Vec<LocalizedAttributesRule>,
-        let mut script_language: HashMap<Script, Vec<Language>> = HashMap::new();
+    ) -> heed::Result<()> {
-        let mut script_language_doc_count: Vec<(Script, Language, u64)> = Vec::new();
+        self.main.remap_types::<Str, SerdeJson<Vec<LocalizedAttributesRule>>>().put(
-        let mut total = 0;
+            txn,
-        for sl in self.script_language_docids.iter(rtxn)? {
+            main_key::LOCALIZED_ATTRIBUTES_RULES,
-            let ((script, language), docids) = sl?;
+            &val,
-
+        )
            // keep only Languages that contains at least 1 document.
            let remaining_documents_count = docids.len();
            total += remaining_documents_count;
            if remaining_documents_count > 0 {
                script_language_doc_count.push((script, language, remaining_documents_count));
            }
    }
-        let threshold = total / 20; // 5% (arbitrary)
+    pub(crate) fn delete_localized_attributes_rules(
-        for (script, language, count) in script_language_doc_count {
+        &self,
-            if count > threshold {
+        txn: &mut RwTxn<'_>,
-                if let Some(languages) = script_language.get_mut(&script) {
+    ) -> heed::Result<bool> {
-                    (*languages).push(language);
+        self.main.remap_key_type::<Str>().delete(txn, main_key::LOCALIZED_ATTRIBUTES_RULES)
                } else {
                    script_language.insert(script, vec![language]);
                }
            }
        }
        Ok(script_language)
    }
    /// Put the embedding configs:
--- a/milli/src/lib.rs
+++ b/milli/src/lib.rs
@ -16,6 +16,7 @@ pub mod facet;
 mod fields_ids_map;
 pub mod heed_codec;
 pub mod index;
 mod localized_attributes_rules;
 pub mod order_by_map;
 pub mod prompt;
 pub mod proximity;
@ -62,6 +63,8 @@ pub use self::heed_codec::{
    UncheckedU8StrStrCodec,
 };
 pub use self::index::Index;
 pub use self::localized_attributes_rules::LocalizedAttributesRule;
 use self::localized_attributes_rules::LocalizedFieldIds;
 pub use self::search::facet::{FacetValueHit, SearchForFacetValues};
 pub use self::search::similar::Similar;
 pub use self::search::{
--- a/milli/src/localized_attributes_rules.rs
+++ b/milli/src/localized_attributes_rules.rs
@ -0,0 +1,114 @@
 use std::collections::HashMap;
 use charabia::Language;
 use serde::{Deserialize, Serialize};
 use crate::fields_ids_map::FieldsIdsMap;
 use crate::FieldId;
 /// A rule that defines which locales are supported for a given attribute.
 ///
 /// The rule is a list of attribute patterns and a list of locales.
 /// The attribute patterns are matched against the attribute name.
 /// The pattern `*` matches any attribute name.
 /// The pattern `attribute_name*` matches any attribute name that starts with `attribute_name`.
 /// The pattern `*attribute_name` matches any attribute name that ends with `attribute_name`.
 /// The pattern `*attribute_name*` matches any attribute name that contains `attribute_name`.
 #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
 pub struct LocalizedAttributesRule {
    pub attribute_patterns: Vec<String>,
    pub locales: Vec<Language>,
 }
 impl LocalizedAttributesRule {
    pub fn new(attribute_patterns: Vec<String>, locales: Vec<Language>) -> Self {
        Self { attribute_patterns, locales }
    }
    pub fn match_str(&self, str: &str) -> bool {
        self.attribute_patterns.iter().any(|pattern| match_pattern(pattern.as_str(), str))
    }
    pub fn locales(&self) -> &[Language] {
        &self.locales
    }
 }
 fn match_pattern(pattern: &str, str: &str) -> bool {
    if pattern == "*" {
        true
    } else if pattern.starts_with('*') && pattern.ends_with('*') {
        str.contains(&pattern[1..pattern.len() - 1])
    } else if let Some(pattern) = pattern.strip_prefix('*') {
        str.ends_with(pattern)
    } else if let Some(pattern) = pattern.strip_suffix('*') {
        str.starts_with(pattern)
    } else {
        pattern == str
    }
 }
 #[derive(Debug, Clone, PartialEq, Eq)]
 pub struct LocalizedFieldIds {
    field_id_to_locales: HashMap<FieldId, Vec<Language>>,
 }
 impl LocalizedFieldIds {
    pub fn new<I: Iterator<Item = FieldId>>(
        rules: &Option<Vec<LocalizedAttributesRule>>,
        fields_ids_map: &FieldsIdsMap,
        fields_ids: I,
    ) -> Self {
        let mut field_id_to_locales = HashMap::new();
        if let Some(rules) = rules {
            let fields = fields_ids.filter_map(|field_id| {
                fields_ids_map.name(field_id).map(|field_name| (field_id, field_name))
            });
            for (field_id, field_name) in fields {
                let mut locales = Vec::new();
                for rule in rules {
                    if rule.match_str(field_name) {
                        locales.extend(rule.locales.iter());
                        // Take the first rule that matches
                        break;
                    }
                }
                if !locales.is_empty() {
                    locales.sort();
                    locales.dedup();
                    field_id_to_locales.insert(field_id, locales);
                }
            }
        }
        Self { field_id_to_locales }
    }
    pub fn locales(&self, fields_id: FieldId) -> Option<&[Language]> {
        self.field_id_to_locales.get(&fields_id).map(Vec::as_slice)
    }
 }
 #[cfg(test)]
 mod tests {
    use super::*;
    #[test]
    fn test_match_pattern() {
        assert!(match_pattern("*", "test"));
        assert!(match_pattern("test*", "test"));
        assert!(match_pattern("test*", "testa"));
        assert!(match_pattern("*test", "test"));
        assert!(match_pattern("*test", "atest"));
        assert!(match_pattern("*test*", "test"));
        assert!(match_pattern("*test*", "atesta"));
        assert!(match_pattern("*test*", "atest"));
        assert!(match_pattern("*test*", "testa"));
        assert!(!match_pattern("test*test", "test"));
        assert!(!match_pattern("*test", "testa"));
        assert!(!match_pattern("test*", "atest"));
    }
 }
--- a/milli/src/search/facet/search.rs
+++ b/milli/src/search/facet/search.rs
@ -3,7 +3,7 @@ use std::collections::BinaryHeap;
 use std::ops::ControlFlow;
 use charabia::normalizer::NormalizerOption;
-use charabia::Normalize;
+use charabia::{Language, Normalize, StrDetection, Token};
 use fst::automaton::{Automaton, Str};
 use fst::{IntoStreamer, Streamer};
 use roaring::RoaringBitmap;
@ -23,6 +23,7 @@ pub struct SearchForFacetValues<'a> {
    search_query: Search<'a>,
    max_values: usize,
    is_hybrid: bool,
    locales: Option<Vec<Language>>,
 }
 impl<'a> SearchForFacetValues<'a> {
@ -37,6 +38,7 @@ impl<'a> SearchForFacetValues<'a> {
            search_query,
            max_values: DEFAULT_MAX_NUMBER_OF_VALUES_PER_FACET,
            is_hybrid,
            locales: None,
        }
    }
@ -50,6 +52,11 @@ impl<'a> SearchForFacetValues<'a> {
        self
    }
    pub fn locales(&mut self, locales: Vec<Language>) -> &mut Self {
        self.locales = Some(locales);
        self
    }
    fn one_original_value_of(
        &self,
        field_id: FieldId,
@ -109,8 +116,7 @@ impl<'a> SearchForFacetValues<'a> {
        match self.query.as_ref() {
            Some(query) => {
-                let options = NormalizerOption { lossy: true, ..Default::default() };
+                let query = normalize_facet_string(query, self.locales.as_deref());
                let query = query.normalize(&options);
                let query = query.as_ref();
                let authorize_typos = self.search_query.index.authorize_typos(rtxn)?;
@ -330,3 +336,15 @@ impl ValuesCollection {
        }
    }
 }
 fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
    let options = NormalizerOption { lossy: true, ..Default::default() };
    let mut detection = StrDetection::new(facet_string, locales);
    let token = Token {
        lemma: std::borrow::Cow::Borrowed(facet_string),
        script: detection.script(),
        language: detection.language(),
        ..Default::default()
    };
    token.normalize(&options).lemma.into_owned()
 }
--- a/milli/src/search/hybrid.rs
+++ b/milli/src/search/hybrid.rs
@ -174,6 +174,7 @@ impl<'a> Search<'a> {
            semantic: self.semantic.clone(),
            time_budget: self.time_budget.clone(),
            ranking_score_threshold: self.ranking_score_threshold,
            locales: self.locales.clone(),
        };
        let semantic = search.semantic.take();
--- a/milli/src/search/mod.rs
+++ b/milli/src/search/mod.rs
@ -1,6 +1,7 @@
 use std::fmt;
 use std::sync::Arc;
 use charabia::Language;
 use levenshtein_automata::{LevenshteinAutomatonBuilder as LevBuilder, DFA};
 use once_cell::sync::Lazy;
 use roaring::bitmap::RoaringBitmap;
@ -52,6 +53,7 @@ pub struct Search<'a> {
    semantic: Option<SemanticSearch>,
    time_budget: TimeBudget,
    ranking_score_threshold: Option<f64>,
    locales: Option<Vec<Language>>,
 }
 impl<'a> Search<'a> {
@ -72,6 +74,7 @@ impl<'a> Search<'a> {
            rtxn,
            index,
            semantic: None,
            locales: None,
            time_budget: TimeBudget::max(),
            ranking_score_threshold: None,
        }
@ -160,6 +163,11 @@ impl<'a> Search<'a> {
        self
    }
    pub fn locales(&mut self, locales: Vec<Language>) -> &mut Search<'a> {
        self.locales = Some(locales);
        self
    }
    pub fn execute_for_candidates(&self, has_vector_search: bool) -> Result<RoaringBitmap> {
        if has_vector_search {
            let ctx = SearchContext::new(self.index, self.rtxn)?;
@ -232,6 +240,7 @@ impl<'a> Search<'a> {
                &mut DefaultSearchLogger,
                self.time_budget.clone(),
                self.ranking_score_threshold,
                self.locales.as_ref(),
            )?,
        };
@ -272,6 +281,7 @@ impl fmt::Debug for Search<'_> {
            semantic,
            time_budget,
            ranking_score_threshold,
            locales,
        } = self;
        f.debug_struct("Search")
            .field("query", query)
@ -292,6 +302,7 @@ impl fmt::Debug for Search<'_> {
            )
            .field("time_budget", time_budget)
            .field("ranking_score_threshold", ranking_score_threshold)
            .field("locales", locales)
            .finish()
    }
 }
--- a/milli/src/search/new/matches/mod.rs
+++ b/milli/src/search/new/matches/mod.rs
@ -1,6 +1,6 @@
 use std::borrow::Cow;
-use charabia::{SeparatorKind, Token, Tokenizer};
+use charabia::{Language, SeparatorKind, Token, Tokenizer};
 pub use matching_words::MatchingWords;
 use matching_words::{MatchType, PartialMatch, WordId};
 use serde::Serialize;
@ -46,7 +46,11 @@ impl<'m> MatcherBuilder<'m> {
        self
    }
-    pub fn build<'t>(&self, text: &'t str) -> Matcher<'t, 'm, '_> {
+    pub fn build<'t, 'lang>(
        &self,
        text: &'t str,
        locales: Option<&'lang [Language]>,
    ) -> Matcher<'t, 'm, '_, 'lang> {
        let crop_marker = match &self.crop_marker {
            Some(marker) => marker.as_str(),
            None => DEFAULT_CROP_MARKER,
@ -68,6 +72,7 @@ impl<'m> MatcherBuilder<'m> {
            highlight_prefix,
            highlight_suffix,
            matches: None,
            locales,
        }
    }
 }
@ -107,17 +112,18 @@ pub struct MatchBounds {
 /// Structure used to analyze a string, compute words that match,
 /// and format the source string, returning a highlighted and cropped sub-string.
-pub struct Matcher<'t, 'tokenizer, 'b> {
+pub struct Matcher<'t, 'tokenizer, 'b, 'lang> {
    text: &'t str,
    matching_words: &'b MatchingWords,
    tokenizer: &'b Tokenizer<'tokenizer>,
    locales: Option<&'lang [Language]>,
    crop_marker: &'b str,
    highlight_prefix: &'b str,
    highlight_suffix: &'b str,
    matches: Option<(Vec<Token<'t>>, Vec<Match>)>,
 }
-impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> {
+impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_, '_> {
    /// Iterates over tokens and save any of them that matches the query.
    fn compute_matches(&mut self) -> &mut Self {
        /// some words are counted as matches only if they are close together and in the good order,
@ -173,7 +179,8 @@ impl<'t, 'tokenizer> Matcher<'t, 'tokenizer, '_> {
            false
        }
-        let tokens: Vec<_> = self.tokenizer.tokenize(self.text).collect();
+        let tokens: Vec<_> =
            self.tokenizer.tokenize_with_allow_list(self.text, self.locales).collect();
        let mut matches = Vec::new();
        let mut words_positions = tokens
@ -530,6 +537,7 @@ mod tests {
                &mut crate::DefaultSearchLogger,
                TimeBudget::max(),
                None,
                None,
            )
            .unwrap();
@ -553,19 +561,19 @@ mod tests {
        // Text without any match.
        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no crop and no highlight should return complete text.
        assert_eq!(&matcher.format(format_options), &text);
        // Text containing all matches.
        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no crop and no highlight should return complete text.
        assert_eq!(&matcher.format(format_options), &text);
        // Text containing some matches.
        let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no crop and no highlight should return complete text.
        assert_eq!(&matcher.format(format_options), &text);
    }
@ -580,23 +588,23 @@ mod tests {
        // empty text.
        let text = "";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        assert_eq!(&matcher.format(format_options), "");
        // text containing only separators.
        let text = ":-)";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        assert_eq!(&matcher.format(format_options), ":-)");
        // Text without any match.
        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no crop should return complete text, because there is no matches.
        assert_eq!(&matcher.format(format_options), &text);
        // Text containing all matches.
        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no crop should return complete text with highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -605,7 +613,7 @@ mod tests {
        // Text containing some matches.
        let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no crop should return complete text with highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -622,7 +630,7 @@ mod tests {
        // Text containing prefix match.
        let text = "Ŵôřlḑôle";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no crop should return complete text with highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -631,7 +639,7 @@ mod tests {
        // Text containing unicode match.
        let text = "Ŵôřlḑ";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no crop should return complete text with highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -643,7 +651,7 @@ mod tests {
        // Text containing unicode match.
        let text = "Westfália";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no crop should return complete text with highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -661,7 +669,7 @@ mod tests {
        // empty text.
        let text = "";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        insta::assert_snapshot!(
            matcher.format(format_options),
            @""
@ -669,7 +677,7 @@ mod tests {
        // text containing only separators.
        let text = ":-)";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        insta::assert_snapshot!(
            matcher.format(format_options),
            @":-)"
@ -677,7 +685,7 @@ mod tests {
        // Text without any match.
        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no highlight should return 10 first words with a marker at the end.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -686,7 +694,7 @@ mod tests {
        // Text without any match starting by a separator.
        let text = "(A quick brown fox can not jump 32 feet, right? Brr, it is cold!)";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no highlight should return 10 first words with a marker at the end.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -695,7 +703,7 @@ mod tests {
        // Test phrase propagation
        let text = "Natalie risk her future. Split The World is a book written by Emily Henry. I never read it.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // should crop the phrase instead of croping around the match.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -704,7 +712,7 @@ mod tests {
        // Text containing some matches.
        let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no highlight should return 10 last words with a marker at the start.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -713,7 +721,7 @@ mod tests {
        // Text containing all matches.
        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // no highlight should return 10 last words with a marker at the start.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -722,7 +730,7 @@ mod tests {
        // Text containing a match unordered and a match ordered.
        let text = "The world split void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // crop should return 10 last words with a marker at the start.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -731,7 +739,7 @@ mod tests {
        // Text containing matches with different density.
        let text = "split void the void void world void void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // crop should return 10 last words with a marker at the start.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -740,7 +748,7 @@ mod tests {
        // Text containing matches with same word.
        let text = "split split split split split split void void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // crop should return 10 last words with a marker at the start.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -758,7 +766,7 @@ mod tests {
        // empty text.
        let text = "";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        insta::assert_snapshot!(
            matcher.format(format_options),
            @""
@ -766,7 +774,7 @@ mod tests {
        // text containing only separators.
        let text = ":-)";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        insta::assert_snapshot!(
            matcher.format(format_options),
            @":-)"
@ -774,7 +782,7 @@ mod tests {
        // Text without any match.
        let text = "A quick brown fox can not jump 32 feet, right? Brr, it is cold!";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // both should return 10 first words with a marker at the end.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -783,7 +791,7 @@ mod tests {
        // Text containing some matches.
        let text = "Natalie risk her future to build a world with the boy she loves.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // both should return 10 last words with a marker at the start and highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -792,7 +800,7 @@ mod tests {
        // Text containing all matches.
        let text = "Natalie risk her future to build a world with the boy she loves. Emily Henry: The Love That Split The World.";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // both should return 10 last words with a marker at the start and highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -801,7 +809,7 @@ mod tests {
        // Text containing a match unordered and a match ordered.
        let text = "The world split void void void void void void void void void split the world void void";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // crop should return 10 last words with a marker at the start.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -824,7 +832,7 @@ mod tests {
        let text = "The groundbreaking invention had the power to split the world between those who embraced progress and those who resisted change!";
        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "\"the world\"");
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // should return 10 words with a marker at the start as well the end, and the highlighted matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -832,7 +840,7 @@ mod tests {
        );
        let builder = MatcherBuilder::new_test(&rtxn, &temp_index, "those \"and those\"");
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // should highlight "those" and the phrase "and those".
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -851,7 +859,7 @@ mod tests {
        // set a smaller crop size
        let format_options = FormatOptions { highlight: false, crop: Some(2) };
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // because crop size < query size, partially format matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -860,7 +868,7 @@ mod tests {
        // set a smaller crop size
        let format_options = FormatOptions { highlight: false, crop: Some(1) };
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // because crop size < query size, partially format matches.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -869,7 +877,7 @@ mod tests {
        // set  crop size to 0
        let format_options = FormatOptions { highlight: false, crop: Some(0) };
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        // because crop size is 0, crop is ignored.
        insta::assert_snapshot!(
            matcher.format(format_options),
@ -889,7 +897,7 @@ mod tests {
        let format_options = FormatOptions { highlight: true, crop: None };
        let text = "the do or die can't be he do and or isn't he";
-        let mut matcher = builder.build(text);
+        let mut matcher = builder.build(text, None);
        insta::assert_snapshot!(
            matcher.format(format_options),
            @"_the_ _do_ _or_ die can't be he do and or isn'_t_ _he_"
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@ -24,7 +24,7 @@ mod tests;
 use std::collections::HashSet;
 use bucket_sort::{bucket_sort, BucketSortOutput};
-use charabia::TokenizerBuilder;
+use charabia::{Language, TokenizerBuilder};
 use db_cache::DatabaseCache;
 use exact_attribute::ExactAttribute;
 use graph_based_ranking_rule::{Exactness, Fid, Position, Proximity, Typo};
@ -639,6 +639,7 @@ pub fn execute_search(
    query_graph_logger: &mut dyn SearchLogger<QueryGraph>,
    time_budget: TimeBudget,
    ranking_score_threshold: Option<f64>,
    locales: Option<&Vec<Language>>,
 ) -> Result<PartialSearchResult> {
    check_sort_criteria(ctx, sort_criteria.as_ref())?;
@ -670,9 +671,8 @@ pub fn execute_search(
            tokbuilder.words_dict(dictionary);
        }
-        let script_lang_map = ctx.index.script_language(ctx.txn)?;
+        if let Some(locales) = locales {
-        if !script_lang_map.is_empty() {
+            tokbuilder.allow_list(locales);
            tokbuilder.allow_list(&script_lang_map);
        }
        let tokenizer = tokbuilder.build();
--- a/milli/src/search/new/query_term/parse_query.rs
+++ b/milli/src/search/new/query_term/parse_query.rs
@ -24,7 +24,7 @@ pub struct ExtractedTokens {
 #[tracing::instrument(level = "trace", skip_all, target = "search::query")]
 pub fn located_query_terms_from_tokens(
    ctx: &mut SearchContext<'_>,
-    query: NormalizedTokenIter<'_, '_>,
+    query: NormalizedTokenIter<'_, '_, '_, '_>,
    words_limit: Option<usize>,
 ) -> Result<ExtractedTokens> {
    let nbr_typos = number_of_typos_allowed(ctx)?;
--- a/milli/src/update/clear_documents.rs
+++ b/milli/src/update/clear_documents.rs
@ -36,7 +36,6 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
            field_id_word_count_docids,
            word_prefix_position_docids,
            word_prefix_fid_docids,
            script_language_docids,
            facet_id_f64_docids,
            facet_id_string_docids,
            facet_id_normalized_string_strings,
@ -83,7 +82,6 @@ impl<'t, 'i> ClearDocuments<'t, 'i> {
        field_id_word_count_docids.clear(self.wtxn)?;
        word_prefix_position_docids.clear(self.wtxn)?;
        word_prefix_fid_docids.clear(self.wtxn)?;
        script_language_docids.clear(self.wtxn)?;
        facet_id_f64_docids.clear(self.wtxn)?;
        facet_id_normalized_string_strings.clear(self.wtxn)?;
        facet_id_string_fst.clear(self.wtxn)?;
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@ -1,10 +1,9 @@
 use std::collections::HashMap;
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::BufReader;
 use std::{io, mem, str};
-use charabia::{Language, Script, SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
+use charabia::{SeparatorKind, Token, TokenKind, Tokenizer, TokenizerBuilder};
 use obkv::{KvReader, KvWriterU16};
 use roaring::RoaringBitmap;
 use serde_json::Value;
@ -15,8 +14,6 @@ use crate::update::del_add::{del_add_from_two_obkvs, DelAdd, KvReaderDelAdd};
 use crate::update::settings::{InnerIndexSettings, InnerIndexSettingsDiff};
 use crate::{FieldId, Result, MAX_POSITION_PER_ATTRIBUTE, MAX_WORD_LENGTH};
 pub type ScriptLanguageDocidsMap = HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>;
 /// Extracts the word and positions where this word appear and
 /// prefixes it by the document id.
 ///
@ -28,7 +25,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    settings_diff: &InnerIndexSettingsDiff,
    max_positions_per_attributes: Option<u32>,
-) -> Result<(grenad::Reader<BufReader<File>>, ScriptLanguageDocidsMap)> {
+) -> Result<grenad::Reader<BufReader<File>>> {
    let max_positions_per_attributes = max_positions_per_attributes
        .map_or(MAX_POSITION_PER_ATTRIBUTE, |max| max.min(MAX_POSITION_PER_ATTRIBUTE));
    let max_memory = indexer.max_memory_by_thread();
@ -36,7 +33,6 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    // initialize destination values.
    let mut documents_ids = RoaringBitmap::new();
    let mut script_language_docids = HashMap::new();
    let mut docid_word_positions_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
        keep_latest_obkv,
@ -61,13 +57,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
        .map(|s| s.iter().map(String::as_str).collect());
    let old_dictionary: Option<Vec<_>> =
        settings_diff.old.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-    let mut del_builder = tokenizer_builder(
+    let del_builder =
-        old_stop_words,
+        tokenizer_builder(old_stop_words, old_separators.as_deref(), old_dictionary.as_deref());
-        old_separators.as_deref(),
+    let del_tokenizer = del_builder.into_tokenizer();
        old_dictionary.as_deref(),
        None,
    );
    let del_tokenizer = del_builder.build();
    let new_stop_words = settings_diff.new.stop_words.as_ref();
    let new_separators: Option<Vec<_>> = settings_diff
@ -77,13 +69,9 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
        .map(|s| s.iter().map(String::as_str).collect());
    let new_dictionary: Option<Vec<_>> =
        settings_diff.new.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
-    let mut add_builder = tokenizer_builder(
+    let add_builder =
-        new_stop_words,
+        tokenizer_builder(new_stop_words, new_separators.as_deref(), new_dictionary.as_deref());
-        new_separators.as_deref(),
+    let add_tokenizer = add_builder.into_tokenizer();
        new_dictionary.as_deref(),
        None,
    );
    let add_tokenizer = add_builder.build();
    // iterate over documents.
    let mut cursor = obkv_documents.into_cursor()?;
@ -109,7 +97,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
        let (del, add): (Result<_>, Result<_>) = rayon::join(
            || {
                // deletions
-                lang_safe_tokens_from_document(
+                tokens_from_document(
                    &obkv,
                    &settings_diff.old,
                    &del_tokenizer,
@ -120,7 +108,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
            },
            || {
                // additions
-                lang_safe_tokens_from_document(
+                tokens_from_document(
                    &obkv,
                    &settings_diff.new,
                    &add_tokenizer,
@ -131,8 +119,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
            },
        );
-        let (del_obkv, del_script_language_word_count) = del?;
+        let del_obkv = del?;
-        let (add_obkv, add_script_language_word_count) = add?;
+        let add_obkv = add?;
        // merge deletions and additions.
        // transforming two KV<FieldId, KV<u16, String>> into one KV<FieldId, KV<DelAdd, KV<u16, String>>>
@ -150,31 +138,10 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
            key_buffer.extend_from_slice(&field_id.to_be_bytes());
            docid_word_positions_sorter.insert(&key_buffer, value)?;
        }
        // update script_language_docids deletions.
        for (script, languages_frequency) in del_script_language_word_count {
            for (language, _) in languages_frequency {
                let entry = script_language_docids
                    .entry((script, language))
                    .or_insert_with(|| (RoaringBitmap::new(), RoaringBitmap::new()));
                entry.0.push(document_id);
            }
        }
        // update script_language_docids additions.
        for (script, languages_frequency) in add_script_language_word_count {
            for (language, _) in languages_frequency {
                let entry = script_language_docids
                    .entry((script, language))
                    .or_insert_with(|| (RoaringBitmap::new(), RoaringBitmap::new()));
                entry.1.push(document_id);
            }
        }
    }
    // the returned sorter is serialized as: key: (DocId, FieldId), value: KV<DelAdd, KV<u16, String>>.
    sorter_into_reader(docid_word_positions_sorter, indexer)
        .map(|reader| (reader, script_language_docids))
 }
 /// Check if any searchable fields of a document changed.
@ -205,7 +172,6 @@ fn tokenizer_builder<'a>(
    stop_words: Option<&'a fst::Set<Vec<u8>>>,
    allowed_separators: Option<&'a [&str]>,
    dictionary: Option<&'a [&str]>,
    script_language: Option<&'a HashMap<Script, Vec<Language>>>,
 ) -> TokenizerBuilder<'a, Vec<u8>> {
    let mut tokenizer_builder = TokenizerBuilder::new();
    if let Some(stop_words) = stop_words {
@ -218,96 +184,23 @@ fn tokenizer_builder<'a>(
        tokenizer_builder.separators(separators);
    }
    if let Some(script_language) = script_language {
        tokenizer_builder.allow_list(script_language);
    }
    tokenizer_builder
 }
 /// Extract words mapped with their positions of a document,
 /// ensuring no Language detection mistakes was made.
 fn lang_safe_tokens_from_document<'a>(
    obkv: &KvReader<'_, FieldId>,
    settings: &InnerIndexSettings,
    tokenizer: &Tokenizer<'_>,
    max_positions_per_attributes: u32,
    del_add: DelAdd,
    buffers: &'a mut Buffers,
 ) -> Result<(&'a [u8], HashMap<Script, Vec<(Language, usize)>>)> {
    let mut script_language_word_count = HashMap::new();
    tokens_from_document(
        obkv,
        &settings.searchable_fields_ids,
        tokenizer,
        max_positions_per_attributes,
        del_add,
        buffers,
        &mut script_language_word_count,
    )?;
    // if we detect a potetial mistake in the language detection,
    // we rerun the extraction forcing the tokenizer to detect the most frequently detected Languages.
    // context: https://github.com/meilisearch/meilisearch/issues/3565
    if script_language_word_count
        .values()
        .map(Vec::as_slice)
        .any(potential_language_detection_error)
    {
        // build an allow list with the most frequent detected languages in the document.
        let script_language: HashMap<_, _> =
            script_language_word_count.iter().filter_map(most_frequent_languages).collect();
        // if the allow list is empty, meaning that no Language is considered frequent,
        // then we don't rerun the extraction.
        if !script_language.is_empty() {
            // build a new temporary tokenizer including the allow list.
            let stop_words = settings.stop_words.as_ref();
            let separators: Option<Vec<_>> = settings
                .allowed_separators
                .as_ref()
                .map(|s| s.iter().map(String::as_str).collect());
            let dictionary: Option<Vec<_>> =
                settings.dictionary.as_ref().map(|s| s.iter().map(String::as_str).collect());
            let mut builder =
                tokenizer_builder(stop_words, separators.as_deref(), dictionary.as_deref(), None);
            let tokenizer = builder.build();
            script_language_word_count.clear();
            // rerun the extraction.
            tokens_from_document(
                obkv,
                &settings.searchable_fields_ids,
                &tokenizer,
                max_positions_per_attributes,
                del_add,
                buffers,
                &mut script_language_word_count,
            )?;
        }
    }
    // returns a (KV<FieldId, KV<u16, String>>, HashMap<Script, Vec<(Language, usize)>>)
    Ok((&buffers.obkv_buffer, script_language_word_count))
 }
 /// Extract words mapped with their positions of a document.
 fn tokens_from_document<'a>(
    obkv: &KvReader<'a, FieldId>,
-    searchable_fields: &[FieldId],
+    settings: &InnerIndexSettings,
    tokenizer: &Tokenizer<'_>,
    max_positions_per_attributes: u32,
    del_add: DelAdd,
    buffers: &'a mut Buffers,
    script_language_word_count: &mut HashMap<Script, Vec<(Language, usize)>>,
 ) -> Result<&'a [u8]> {
    buffers.obkv_buffer.clear();
    let mut document_writer = KvWriterU16::new(&mut buffers.obkv_buffer);
    for (field_id, field_bytes) in obkv.iter() {
        // if field is searchable.
-        if searchable_fields.as_ref().contains(&field_id) {
+        if settings.searchable_fields_ids.contains(&field_id) {
            // extract deletion or addition only.
            if let Some(field_bytes) = KvReaderDelAdd::new(field_bytes).get(del_add) {
                // parse json.
@ -322,20 +215,11 @@ fn tokens_from_document<'a>(
                buffers.field_buffer.clear();
                if let Some(field) = json_to_string(&value, &mut buffers.field_buffer) {
                    // create an iterator of token with their positions.
-                    let tokens = process_tokens(tokenizer.tokenize(field))
+                    let locales = settings.localized_searchable_fields_ids.locales(field_id);
                    let tokens = process_tokens(tokenizer.tokenize_with_allow_list(field, locales))
                        .take_while(|(p, _)| (*p as u32) < max_positions_per_attributes);
                    for (index, token) in tokens {
                        // if a language has been detected for the token, we update the counter.
                        if let Some(language) = token.language {
                            let script = token.script;
                            let entry = script_language_word_count.entry(script).or_default();
                            match entry.iter_mut().find(|(l, _)| *l == language) {
                                Some((_, n)) => *n += 1,
                                None => entry.push((language, 1)),
                            }
                        }
                        // keep a word only if it is not empty and fit in a LMDB key.
                        let token = token.lemma().trim();
                        if !token.is_empty() && token.len() <= MAX_WORD_LENGTH {
@ -423,39 +307,6 @@ fn process_tokens<'a>(
        .filter(|(_, t)| t.is_word())
 }
 fn potential_language_detection_error(languages_frequency: &[(Language, usize)]) -> bool {
    if languages_frequency.len() > 1 {
        let threshold = compute_language_frequency_threshold(languages_frequency);
        languages_frequency.iter().any(|(_, c)| *c <= threshold)
    } else {
        false
    }
 }
 fn most_frequent_languages(
    (script, languages_frequency): (&Script, &Vec<(Language, usize)>),
 ) -> Option<(Script, Vec<Language>)> {
    if languages_frequency.len() > 1 {
        let threshold = compute_language_frequency_threshold(languages_frequency);
        let languages: Vec<_> =
            languages_frequency.iter().filter(|(_, c)| *c > threshold).map(|(l, _)| *l).collect();
        if languages.is_empty() {
            None
        } else {
            Some((*script, languages))
        }
    } else {
        None
    }
 }
 fn compute_language_frequency_threshold(languages_frequency: &[(Language, usize)]) -> usize {
    let total: usize = languages_frequency.iter().map(|(_, c)| c).sum();
    total / 10 // 10% is a completely arbitrary value.
 }
 #[derive(Default)]
 struct Buffers {
    // the field buffer for each fields desserialization, and must be cleared between each field.
--- a/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
+++ b/milli/src/update/index_documents/extract/extract_facet_string_docids.rs
@ -5,6 +5,7 @@ use std::iter::FromIterator;
 use std::{io, str};
 use charabia::normalizer::{Normalize, NormalizerOption};
 use charabia::{Language, StrDetection, Token};
 use heed::types::SerdeJson;
 use heed::BytesEncode;
@ -26,10 +27,9 @@ use crate::{FieldId, Result, MAX_FACET_VALUE_LENGTH};
 pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
    docid_fid_facet_string: grenad::Reader<R>,
    indexer: GrenadParameters,
-    _settings_diff: &InnerIndexSettingsDiff,
+    settings_diff: &InnerIndexSettingsDiff,
 ) -> Result<(grenad::Reader<BufReader<File>>, grenad::Reader<BufReader<File>>)> {
    let max_memory = indexer.max_memory_by_thread();
    let options = NormalizerOption { lossy: true, ..Default::default() };
    let mut facet_string_docids_sorter = create_sorter(
        grenad::SortAlgorithm::Stable,
@ -54,12 +54,8 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
    while let Some((key, deladd_original_value_bytes)) = cursor.move_on_next()? {
        let deladd_reader = KvReaderDelAdd::new(deladd_original_value_bytes);
-        // nothing to do if we delete and re-add the value.
+        let is_same_value = deladd_reader.get(DelAdd::Deletion).is_some()
-        if deladd_reader.get(DelAdd::Deletion).is_some()
+            && deladd_reader.get(DelAdd::Addition).is_some();
            && deladd_reader.get(DelAdd::Addition).is_some()
        {
            continue;
        }
        let (field_id_bytes, bytes) = try_split_array_at(key).unwrap();
        let field_id = FieldId::from_be_bytes(field_id_bytes);
@ -72,18 +68,20 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
        // Facet search normalization
        {
-            let mut hyper_normalized_value = normalized_value.normalize(&options);
+            let locales = settings_diff.old.localized_faceted_fields_ids.locales(field_id);
-            let normalized_truncated_facet: String;
+            let old_hyper_normalized_value = normalize_facet_string(normalized_value, locales);
-            if hyper_normalized_value.len() > MAX_FACET_VALUE_LENGTH {
+            let locales = settings_diff.new.localized_faceted_fields_ids.locales(field_id);
-                normalized_truncated_facet = hyper_normalized_value
+            let new_hyper_normalized_value = normalize_facet_string(normalized_value, locales);
-                    .char_indices()
+
                    .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
                    .map(|(_, c)| c)
                    .collect();
                hyper_normalized_value = normalized_truncated_facet.into();
            }
            let set = BTreeSet::from_iter(std::iter::once(normalized_value));
            // if the facet string is the same, we can put the deletion and addition in the same obkv.
            if old_hyper_normalized_value == new_hyper_normalized_value {
                // nothing to do if we delete and re-add the value.
                if is_same_value {
                    continue;
                }
                buffer.clear();
                let mut obkv = KvWriterDelAdd::new(&mut buffer);
                for (deladd_key, _) in deladd_reader.iter() {
@ -92,9 +90,44 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
                }
                obkv.finish()?;
-            let key = (field_id, hyper_normalized_value.as_ref());
+                let key: (u16, &str) = (field_id, new_hyper_normalized_value.as_ref());
                let key_bytes = BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
                normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
            } else {
                // if the facet string is different, we need to insert the deletion and addition in different obkv because the related key is different.
                // deletion
                if deladd_reader.get(DelAdd::Deletion).is_some() {
                    // insert old value
                    let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
                    buffer.clear();
                    let mut obkv = KvWriterDelAdd::new(&mut buffer);
                    obkv.insert(DelAdd::Deletion, val)?;
                    obkv.finish()?;
                    let key: (u16, &str) = (field_id, old_hyper_normalized_value.as_ref());
                    let key_bytes =
                        BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
                    normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
                }
                // addition
                if deladd_reader.get(DelAdd::Addition).is_some() {
                    // insert new value
                    let val = SerdeJson::bytes_encode(&set).map_err(heed::Error::Encoding)?;
                    buffer.clear();
                    let mut obkv = KvWriterDelAdd::new(&mut buffer);
                    obkv.insert(DelAdd::Addition, val)?;
                    obkv.finish()?;
                    let key: (u16, &str) = (field_id, new_hyper_normalized_value.as_ref());
                    let key_bytes =
                        BEU16StrCodec::bytes_encode(&key).map_err(heed::Error::Encoding)?;
                    normalized_facet_string_docids_sorter.insert(key_bytes, &buffer)?;
                }
            }
        }
        // nothing to do if we delete and re-add the value.
        if is_same_value {
            continue;
        }
        let key = FacetGroupKey { field_id, level: 0, left_bound: normalized_value };
@ -112,3 +145,24 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
    let normalized = sorter_into_reader(normalized_facet_string_docids_sorter, indexer)?;
    sorter_into_reader(facet_string_docids_sorter, indexer).map(|s| (s, normalized))
 }
 /// Normalizes the facet string and truncates it to the max length.
 fn normalize_facet_string(facet_string: &str, locales: Option<&[Language]>) -> String {
    let options = NormalizerOption { lossy: true, ..Default::default() };
    let mut detection = StrDetection::new(facet_string, locales);
    let token = Token {
        lemma: std::borrow::Cow::Borrowed(facet_string),
        script: detection.script(),
        language: detection.language(),
        ..Default::default()
    };
    // truncate the facet string to the max length
    token
        .normalize(&options)
        .lemma
        .char_indices()
        .take_while(|(idx, _)| *idx < MAX_FACET_VALUE_LENGTH)
        .map(|(_, c)| c)
        .collect()
 }
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@ -345,8 +345,7 @@ fn send_and_extract_flattened_documents_data(
    let (docid_word_positions_chunk, fid_docid_facet_values_chunks): (Result<_>, Result<_>) =
        rayon::join(
            || {
-                let (docid_word_positions_chunk, script_language_pair) =
+                let docid_word_positions_chunk = extract_docid_word_positions(
                    extract_docid_word_positions(
                    flattened_documents_chunk.clone(),
                    indexer,
                    &settings_diff,
@ -357,9 +356,6 @@ fn send_and_extract_flattened_documents_data(
                let docid_word_positions_chunk =
                    unsafe { as_cloneable_grenad(&docid_word_positions_chunk)? };
                let _ =
                    lmdb_writer_sx.send(Ok(TypedChunk::ScriptLanguageDocids(script_language_pair)));
                Ok(docid_word_positions_chunk)
            },
            || {
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -3388,44 +3388,6 @@ mod tests {
        wtxn.commit().unwrap();
    }
    #[test]
    #[cfg(feature = "all-tokenizations")]
    fn stored_detected_script_and_language_should_not_return_deleted_documents() {
        use charabia::{Language, Script};
        let index = TempIndex::new();
        let mut wtxn = index.write_txn().unwrap();
        index
            .add_documents_using_wtxn(
                &mut wtxn,
                documents!([
                { "id": "0", "title": "The quick (\"brown\") fox can't jump 32.3 feet, right? Brr, it's 29.3°F!" },
                { "id": "1", "title": "人人生而自由﹐在尊嚴和權利上一律平等。他們賦有理性和良心﹐並應以兄弟關係的精神互相對待。" },
                { "id": "2", "title": "הַשּׁוּעָל הַמָּהִיר (״הַחוּם״) לֹא יָכוֹל לִקְפֹּץ 9.94 מֶטְרִים, נָכוֹן? ברר, 1.5°C- בַּחוּץ!" },
                { "id": "3", "title": "関西国際空港限定トートバッグ すもももももももものうち" },
                { "id": "4", "title": "ภาษาไทยง่ายนิดเดียว" },
                { "id": "5", "title": "The quick 在尊嚴和權利上一律平等。" },
            ]))
            .unwrap();
        let key_cmn = (Script::Cj, Language::Cmn);
        let cj_cmn_docs =
            index.script_language_documents_ids(&wtxn, &key_cmn).unwrap().unwrap_or_default();
        let mut expected_cj_cmn_docids = RoaringBitmap::new();
        expected_cj_cmn_docids.push(1);
        expected_cj_cmn_docids.push(5);
        assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
        delete_documents(&mut wtxn, &index, &["1"]);
        wtxn.commit().unwrap();
        let rtxn = index.read_txn().unwrap();
        let cj_cmn_docs =
            index.script_language_documents_ids(&rtxn, &key_cmn).unwrap().unwrap_or_default();
        let mut expected_cj_cmn_docids = RoaringBitmap::new();
        expected_cj_cmn_docids.push(5);
        assert_eq!(cj_cmn_docs, expected_cj_cmn_docids);
    }
    #[test]
    fn delete_words_exact_attributes() {
        let index = TempIndex::new();
--- a/milli/src/update/index_documents/typed_chunk.rs
+++ b/milli/src/update/index_documents/typed_chunk.rs
@ -1,10 +1,9 @@
-use std::collections::{BTreeSet, HashMap};
+use std::collections::BTreeSet;
 use std::convert::TryInto;
 use std::fs::File;
 use std::io::{self, BufReader};
 use bytemuck::allocation::pod_collect_to_vec;
 use charabia::{Language, Script};
 use grenad::{Merger, MergerBuilder};
 use heed::types::Bytes;
 use heed::{BytesDecode, RwTxn};
@ -94,7 +93,6 @@ pub(crate) enum TypedChunk {
        add_to_user_provided: RoaringBitmap,
        remove_from_user_provided: RoaringBitmap,
    },
    ScriptLanguageDocids(HashMap<(Script, Language), (RoaringBitmap, RoaringBitmap)>),
 }
 impl TypedChunk {
@ -113,8 +111,7 @@ impl TypedChunk {
            | (FieldIdFacetExistsDocids(_), FieldIdFacetExistsDocids(_))
            | (FieldIdFacetIsNullDocids(_), FieldIdFacetIsNullDocids(_))
            | (FieldIdFacetIsEmptyDocids(_), FieldIdFacetIsEmptyDocids(_))
-            | (GeoPoints(_), GeoPoints(_))
+            | (GeoPoints(_), GeoPoints(_)) => true,
            | (ScriptLanguageDocids(_), ScriptLanguageDocids(_)) => true,
            (
                VectorPoints { embedder_name: left, expected_dimension: left_dim, .. },
                VectorPoints { embedder_name: right, expected_dimension: right_dim, .. },
@ -775,33 +772,6 @@ pub(crate) fn write_typed_chunk_into_index(
            tracing::debug!("Finished vector chunk for {}", embedder_name);
        }
        TypedChunk::ScriptLanguageDocids(_) => {
            let span = tracing::trace_span!(target: "indexing::write_db", "script_language_docids");
            let _entered = span.enter();
            for typed_chunk in typed_chunks {
                let TypedChunk::ScriptLanguageDocids(sl_map) = typed_chunk else { unreachable!() };
                for (key, (deletion, addition)) in sl_map {
                    let mut db_key_exists = false;
                    let final_value = match index.script_language_docids.get(wtxn, &key)? {
                        Some(db_values) => {
                            db_key_exists = true;
                            (db_values - deletion) | addition
                        }
                        None => addition,
                    };
                    if final_value.is_empty() {
                        // If the database entry exists, delete it.
                        if db_key_exists {
                            index.script_language_docids.delete(wtxn, &key)?;
                        }
                    } else {
                        index.script_language_docids.put(wtxn, &key, &final_value)?;
                    }
                }
            }
        }
    }
    Ok((RoaringBitmap::new(), is_merged_database))
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@ -28,7 +28,7 @@ use crate::vector::settings::{
    WriteBackToDocuments,
 };
 use crate::vector::{Embedder, EmbeddingConfig, EmbeddingConfigs};
-use crate::{FieldId, FieldsIdsMap, Index, Result};
+use crate::{FieldId, FieldsIdsMap, Index, LocalizedAttributesRule, LocalizedFieldIds, Result};
 #[derive(Debug, Clone, PartialEq, Eq, Copy)]
 pub enum Setting<T> {
@ -159,6 +159,7 @@ pub struct Settings<'a, 't, 'i> {
    proximity_precision: Setting<ProximityPrecision>,
    embedder_settings: Setting<BTreeMap<String, Setting<EmbeddingSettings>>>,
    search_cutoff: Setting<u64>,
    localized_attributes_rules: Setting<Vec<LocalizedAttributesRule>>,
 }
 impl<'a, 't, 'i> Settings<'a, 't, 'i> {
@ -193,6 +194,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
            proximity_precision: Setting::NotSet,
            embedder_settings: Setting::NotSet,
            search_cutoff: Setting::NotSet,
            localized_attributes_rules: Setting::NotSet,
            indexer_config,
        }
    }
@ -391,6 +393,14 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
        self.search_cutoff = Setting::Reset;
    }
    pub fn set_localized_attributes_rules(&mut self, value: Vec<LocalizedAttributesRule>) {
        self.localized_attributes_rules = Setting::Set(value);
    }
    pub fn reset_localized_attributes_rules(&mut self) {
        self.localized_attributes_rules = Setting::Reset;
    }
    #[tracing::instrument(
        level = "trace"
        skip(self, progress_callback, should_abort, settings_diff),
@ -1118,6 +1128,23 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
        Ok(changed)
    }
    fn update_localized_attributes_rules(&mut self) -> Result<()> {
        match &self.localized_attributes_rules {
            Setting::Set(new) => {
                let old = self.index.localized_attributes_rules(self.wtxn)?;
                if old.as_ref() != Some(new) {
                    self.index.put_localized_attributes_rules(self.wtxn, new.clone())?;
                }
            }
            Setting::Reset => {
                self.index.delete_localized_attributes_rules(self.wtxn)?;
            }
            Setting::NotSet => (),
        }
        Ok(())
    }
    pub fn execute<FP, FA>(mut self, progress_callback: FP, should_abort: FA) -> Result<()>
    where
        FP: Fn(UpdateIndexingStep) + Sync,
@ -1151,6 +1178,7 @@ impl<'a, 't, 'i> Settings<'a, 't, 'i> {
        self.update_searchable()?;
        self.update_exact_attributes()?;
        self.update_proximity_precision()?;
        self.update_localized_attributes_rules()?;
        let embedding_config_updates = self.update_embedding_configs()?;
@ -1229,6 +1257,8 @@ impl InnerIndexSettingsDiff {
                || old_settings.allowed_separators != new_settings.allowed_separators
                || old_settings.dictionary != new_settings.dictionary
                || old_settings.proximity_precision != new_settings.proximity_precision
                || old_settings.localized_searchable_fields_ids
                    != new_settings.localized_searchable_fields_ids
        };
        let cache_exact_attributes = old_settings.exact_attributes != new_settings.exact_attributes;
@ -1304,6 +1334,7 @@ impl InnerIndexSettingsDiff {
        }
        (existing_fields - old_faceted_fields) != (existing_fields - new_faceted_fields)
            || self.old.localized_faceted_fields_ids != self.new.localized_faceted_fields_ids
    }
    pub fn reindex_vectors(&self) -> bool {
@ -1341,6 +1372,8 @@ pub(crate) struct InnerIndexSettings {
    pub geo_fields_ids: Option<(FieldId, FieldId)>,
    pub non_searchable_fields_ids: Vec<FieldId>,
    pub non_faceted_fields_ids: Vec<FieldId>,
    pub localized_searchable_fields_ids: LocalizedFieldIds,
    pub localized_faceted_fields_ids: LocalizedFieldIds,
 }
 impl InnerIndexSettings {
@ -1382,6 +1415,17 @@ impl InnerIndexSettings {
            }
            None => None,
        };
        let localized_attributes_rules = index.localized_attributes_rules(rtxn)?;
        let localized_searchable_fields_ids = LocalizedFieldIds::new(
            &localized_attributes_rules,
            &fields_ids_map,
            searchable_fields_ids.iter().cloned(),
        );
        let localized_faceted_fields_ids = LocalizedFieldIds::new(
            &localized_attributes_rules,
            &fields_ids_map,
            faceted_fields_ids.iter().cloned(),
        );
        let vectors_fids = fields_ids_map.nested_ids(RESERVED_VECTORS_FIELD_NAME);
        searchable_fields_ids.retain(|id| !vectors_fids.contains(id));
@ -1403,6 +1447,8 @@ impl InnerIndexSettings {
            geo_fields_ids,
            non_searchable_fields_ids: vectors_fids.clone(),
            non_faceted_fields_ids: vectors_fids.clone(),
            localized_searchable_fields_ids,
            localized_faceted_fields_ids,
        })
    }
@ -1418,6 +1464,12 @@ impl InnerIndexSettings {
        index.put_faceted_fields(wtxn, &new_facets)?;
        self.faceted_fields_ids = index.faceted_fields_ids(wtxn)?;
        let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
        self.localized_faceted_fields_ids = LocalizedFieldIds::new(
            &localized_attributes_rules,
            &self.fields_ids_map,
            self.faceted_fields_ids.iter().cloned(),
        );
        Ok(())
    }
@ -1441,8 +1493,13 @@ impl InnerIndexSettings {
                &self.fields_ids_map,
            )?;
        }
-        let searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
+        self.searchable_fields_ids = index.searchable_fields_ids(wtxn)?;
-        self.searchable_fields_ids = searchable_fields_ids;
+        let localized_attributes_rules = index.localized_attributes_rules(wtxn)?;
        self.localized_searchable_fields_ids = LocalizedFieldIds::new(
            &localized_attributes_rules,
            &self.fields_ids_map,
            self.searchable_fields_ids.iter().cloned(),
        );
        Ok(())
    }
@ -2573,6 +2630,7 @@ mod tests {
                    proximity_precision,
                    embedder_settings,
                    search_cutoff,
                    localized_attributes_rules,
                } = settings;
                assert!(matches!(searchable_fields, Setting::NotSet));
                assert!(matches!(displayed_fields, Setting::NotSet));
@ -2597,6 +2655,7 @@ mod tests {
                assert!(matches!(proximity_precision, Setting::NotSet));
                assert!(matches!(embedder_settings, Setting::NotSet));
                assert!(matches!(search_cutoff, Setting::NotSet));
                assert!(matches!(localized_attributes_rules, Setting::NotSet));
            })
            .unwrap();
    }