Merge #3946

3946: Settings customizing tokenization r=irevoire a=ManyTheFish # Pull Request This pull Request allows the User to customize Meilisearch Tokenization by providing specialized settings. ## Small documentation All the new settings can be set and reset like the other index settings by calling the route `/indexes/:name/settings` ### `nonSeparatorTokens` The Meilisearch word segmentation uses a default list of separators to segment words, however, for specific use cases some of the default separators shouldn't be considered separators, the `nonSeparatorTokens` setting allows to remove of some tokens from the default list of separators. ***Request payload `PUT`- `/indexes/articles/settings/non-separator-tokens`*** ```json ["`@",` "#", "&"] ``` ### `separatorTokens` Some use cases need to define additional separators, some are related to a specific way of parsing technical documents some others are related to encodings in documents, the `separatorTokens` setting allows adding some tokens to the list of separators. ***Request payload `PUT`- `/indexes/articles/settings/separator-tokens`*** ```json ["§", "&sep"] ``` ### `dictionary` The Meilisearch word segmentation relies on separators and language-based word-dictionaries to segment words, however, this segmentation is inaccurate on technical or use-case specific vocabulary (like `G/Box` to say `Gear Box`), or on proper nouns (like `J. R. R.` when parsing `J. R. R. Tolkien`), the `dictionary` setting allows defining a list of words that would be segmented as described in the list. ***Request payload `PUT`- `/indexes/articles/settings/dictionary`*** ```json ["J. R. R.", "J.R.R."] ``` these last feature synergies well with the `stopWords` setting or the `synonyms` setting allowing to segment words and correctly retrieve the synonyms: ***Request payload `PATCH`- `/indexes/articles/settings`*** ```json { "dictionary": ["J. R. R.", "J.R.R."], "synonyms": { "J.R.R.": ["jrr", "J. R. R."], "J. R. R.": ["jrr", "J.R.R."], "jrr": ["J.R.R.", "J. R. R."], } } ``` ### Related specifications: - https://github.com/meilisearch/specifications/pull/255 - https://github.com/meilisearch/specifications/pull/254 ### Try it with Docker ```bash $ docker pull getmeili/meilisearch:prototype-tokenizer-customization-3 ``` ## Related issue Fixes #3610 Fixes #3917 Fixes https://github.com/meilisearch/product/discussions/468 Fixes https://github.com/meilisearch/product/discussions/160 Fixes https://github.com/meilisearch/product/discussions/260 Fixes https://github.com/meilisearch/product/discussions/381 Fixes https://github.com/meilisearch/product/discussions/131 Related to https://github.com/meilisearch/meilisearch/issues/2879 Fixes #2760 ## What does this PR do? - Add a setting `nonSeparatorTokens` allowing to remove a token from the default separator tokens - Add a setting `separatorTokens` allowing to add a token in the separator tokens - Add a setting `dictionary` allowing to override the segmentation on specific words - add new error code `invalid_settings_non_separator_tokens` (invalid_request) - add new error code `invalid_settings_separator_tokens` (invalid_request) - add new error code `invalid_settings_dictionary` (invalid_request) Co-authored-by: ManyTheFish <many@meilisearch.com> Co-authored-by: Many the fish <many@meilisearch.com>
2025-06-18 12:47:35 +02:00 · 2023-08-10 10:01:18 +00:00 · 2023-08-10 10:01:18 +00:00 · 8084cf29f3
commit 8084cf29f3
parent 29adfc2f68 5a7c1bde84
21 changed files with 1667 additions and 158 deletions
--- a/dump/src/lib.rs
+++ b/dump/src/lib.rs
@ -262,6 +262,9 @@ pub(crate) mod test {
            sortable_attributes: Setting::Set(btreeset! { S("age") }),
            ranking_rules: Setting::NotSet,
            stop_words: Setting::NotSet,
            non_separator_tokens: Setting::NotSet,
            separator_tokens: Setting::NotSet,
            dictionary: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
--- a/dump/src/reader/compat/v5_to_v6.rs
+++ b/dump/src/reader/compat/v5_to_v6.rs
@ -340,6 +340,9 @@ impl<T> From<v5::Settings<T>> for v6::Settings<v6::Unchecked> {
                }
            },
            stop_words: settings.stop_words.into(),
            non_separator_tokens: v6::Setting::NotSet,
            separator_tokens: v6::Setting::NotSet,
            dictionary: v6::Setting::NotSet,
            synonyms: settings.synonyms.into(),
            distinct_attribute: settings.distinct_attribute.into(),
            typo_tolerance: match settings.typo_tolerance {
--- a/meilisearch-types/src/error.rs
+++ b/meilisearch-types/src/error.rs
@ -259,6 +259,9 @@ InvalidSettingsRankingRules           , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsSearchableAttributes   , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsSortableAttributes     , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsStopWords              , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsNonSeparatorTokens     , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsSeparatorTokens        , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsDictionary             , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsSynonyms               , InvalidRequest       , BAD_REQUEST ;
 InvalidSettingsTypoTolerance          , InvalidRequest       , BAD_REQUEST ;
 InvalidState                          , Internal             , INTERNAL_SERVER_ERROR ;
--- a/meilisearch-types/src/settings.rs
+++ b/meilisearch-types/src/settings.rs
@ -171,6 +171,15 @@ pub struct Settings<T> {
    #[deserr(default, error = DeserrJsonError<InvalidSettingsStopWords>)]
    pub stop_words: Setting<BTreeSet<String>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsNonSeparatorTokens>)]
    pub non_separator_tokens: Setting<BTreeSet<String>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsSeparatorTokens>)]
    pub separator_tokens: Setting<BTreeSet<String>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsDictionary>)]
    pub dictionary: Setting<BTreeSet<String>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
    #[deserr(default, error = DeserrJsonError<InvalidSettingsSynonyms>)]
    pub synonyms: Setting<BTreeMap<String, Vec<String>>>,
    #[serde(default, skip_serializing_if = "Setting::is_not_set")]
@ -201,6 +210,9 @@ impl Settings<Checked> {
            ranking_rules: Setting::Reset,
            stop_words: Setting::Reset,
            synonyms: Setting::Reset,
            non_separator_tokens: Setting::Reset,
            separator_tokens: Setting::Reset,
            dictionary: Setting::Reset,
            distinct_attribute: Setting::Reset,
            typo_tolerance: Setting::Reset,
            faceting: Setting::Reset,
@ -217,6 +229,9 @@ impl Settings<Checked> {
            sortable_attributes,
            ranking_rules,
            stop_words,
            non_separator_tokens,
            separator_tokens,
            dictionary,
            synonyms,
            distinct_attribute,
            typo_tolerance,
@ -232,6 +247,9 @@ impl Settings<Checked> {
            sortable_attributes,
            ranking_rules,
            stop_words,
            non_separator_tokens,
            separator_tokens,
            dictionary,
            synonyms,
            distinct_attribute,
            typo_tolerance,
@ -274,6 +292,9 @@ impl Settings<Unchecked> {
            ranking_rules: self.ranking_rules,
            stop_words: self.stop_words,
            synonyms: self.synonyms,
            non_separator_tokens: self.non_separator_tokens,
            separator_tokens: self.separator_tokens,
            dictionary: self.dictionary,
            distinct_attribute: self.distinct_attribute,
            typo_tolerance: self.typo_tolerance,
            faceting: self.faceting,
@ -335,6 +356,28 @@ pub fn apply_settings_to_builder(
        Setting::NotSet => (),
    }
    match settings.non_separator_tokens {
        Setting::Set(ref non_separator_tokens) => {
            builder.set_non_separator_tokens(non_separator_tokens.clone())
        }
        Setting::Reset => builder.reset_non_separator_tokens(),
        Setting::NotSet => (),
    }
    match settings.separator_tokens {
        Setting::Set(ref separator_tokens) => {
            builder.set_separator_tokens(separator_tokens.clone())
        }
        Setting::Reset => builder.reset_separator_tokens(),
        Setting::NotSet => (),
    }
    match settings.dictionary {
        Setting::Set(ref dictionary) => builder.set_dictionary(dictionary.clone()),
        Setting::Reset => builder.reset_dictionary(),
        Setting::NotSet => (),
    }
    match settings.synonyms {
        Setting::Set(ref synonyms) => builder.set_synonyms(synonyms.clone().into_iter().collect()),
        Setting::Reset => builder.reset_synonyms(),
@ -459,15 +502,14 @@ pub fn settings(
        })
        .transpose()?
        .unwrap_or_default();
    let non_separator_tokens = index.non_separator_tokens(rtxn)?.unwrap_or_default();
    let separator_tokens = index.separator_tokens(rtxn)?.unwrap_or_default();
    let dictionary = index.dictionary(rtxn)?.unwrap_or_default();
    let distinct_field = index.distinct_field(rtxn)?.map(String::from);
-    // in milli each word in the synonyms map were split on their separator. Since we lost
+    let synonyms = index.user_defined_synonyms(rtxn)?;
    // this information we are going to put space between words.
    let synonyms = index
        .synonyms(rtxn)?
        .iter()
        .map(|(key, values)| (key.join(" "), values.iter().map(|value| value.join(" ")).collect()))
        .collect();
    let min_typo_word_len = MinWordSizeTyposSetting {
        one_typo: Setting::Set(index.min_word_len_one_typo(rtxn)?),
@ -520,6 +562,9 @@ pub fn settings(
        sortable_attributes: Setting::Set(sortable_attributes),
        ranking_rules: Setting::Set(criteria.iter().map(|c| c.clone().into()).collect()),
        stop_words: Setting::Set(stop_words),
        non_separator_tokens: Setting::Set(non_separator_tokens),
        separator_tokens: Setting::Set(separator_tokens),
        dictionary: Setting::Set(dictionary),
        distinct_attribute: match distinct_field {
            Some(field) => Setting::Set(field),
            None => Setting::Reset,
@ -642,6 +687,9 @@ pub(crate) mod test {
            sortable_attributes: Setting::NotSet,
            ranking_rules: Setting::NotSet,
            stop_words: Setting::NotSet,
            non_separator_tokens: Setting::NotSet,
            separator_tokens: Setting::NotSet,
            dictionary: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
@ -663,6 +711,9 @@ pub(crate) mod test {
            sortable_attributes: Setting::NotSet,
            ranking_rules: Setting::NotSet,
            stop_words: Setting::NotSet,
            non_separator_tokens: Setting::NotSet,
            separator_tokens: Setting::NotSet,
            dictionary: Setting::NotSet,
            synonyms: Setting::NotSet,
            distinct_attribute: Setting::NotSet,
            typo_tolerance: Setting::NotSet,
--- a/meilisearch/src/analytics/mock_analytics.rs
+++ b/meilisearch/src/analytics/mock_analytics.rs
@ -20,7 +20,7 @@ pub struct SearchAggregator;
 #[allow(dead_code)]
 impl SearchAggregator {
    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
-        Self::default()
+        Self
    }
    pub fn succeed(&mut self, _: &dyn Any) {}
@ -32,7 +32,7 @@ pub struct MultiSearchAggregator;
 #[allow(dead_code)]
 impl MultiSearchAggregator {
    pub fn from_queries(_: &dyn Any, _: &dyn Any) -> Self {
-        Self::default()
+        Self
    }
    pub fn succeed(&mut self) {}
@ -44,7 +44,7 @@ pub struct FacetSearchAggregator;
 #[allow(dead_code)]
 impl FacetSearchAggregator {
    pub fn from_query(_: &dyn Any, _: &dyn Any) -> Self {
-        Self::default()
+        Self
    }
    pub fn succeed(&mut self, _: &dyn Any) {}
--- a/meilisearch/src/routes/indexes/settings.rs
+++ b/meilisearch/src/routes/indexes/settings.rs
@ -310,6 +310,81 @@ make_setting_route!(
    }
 );
 make_setting_route!(
    "/non-separator-tokens",
    put,
    std::collections::BTreeSet<String>,
    meilisearch_types::deserr::DeserrJsonError<
        meilisearch_types::error::deserr_codes::InvalidSettingsNonSeparatorTokens,
    >,
    non_separator_tokens,
    "nonSeparatorTokens",
    analytics,
    |non_separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
        use serde_json::json;
        analytics.publish(
            "nonSeparatorTokens Updated".to_string(),
            json!({
                "non_separator_tokens": {
                    "total": non_separator_tokens.as_ref().map(|non_separator_tokens| non_separator_tokens.len()),
                },
            }),
            Some(req),
        );
    }
 );
 make_setting_route!(
    "/separator-tokens",
    put,
    std::collections::BTreeSet<String>,
    meilisearch_types::deserr::DeserrJsonError<
        meilisearch_types::error::deserr_codes::InvalidSettingsSeparatorTokens,
    >,
    separator_tokens,
    "separatorTokens",
    analytics,
    |separator_tokens: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
        use serde_json::json;
        analytics.publish(
            "separatorTokens Updated".to_string(),
            json!({
                "separator_tokens": {
                    "total": separator_tokens.as_ref().map(|separator_tokens| separator_tokens.len()),
                },
            }),
            Some(req),
        );
    }
 );
 make_setting_route!(
    "/dictionary",
    put,
    std::collections::BTreeSet<String>,
    meilisearch_types::deserr::DeserrJsonError<
        meilisearch_types::error::deserr_codes::InvalidSettingsDictionary,
    >,
    dictionary,
    "dictionary",
    analytics,
    |dictionary: &Option<std::collections::BTreeSet<String>>, req: &HttpRequest| {
        use serde_json::json;
        analytics.publish(
            "dictionary Updated".to_string(),
            json!({
                "dictionary": {
                    "total": dictionary.as_ref().map(|dictionary| dictionary.len()),
                },
            }),
            Some(req),
        );
    }
 );
 make_setting_route!(
    "/synonyms",
    put,
@ -466,6 +541,9 @@ generate_configure!(
    searchable_attributes,
    distinct_attribute,
    stop_words,
    separator_tokens,
    non_separator_tokens,
    dictionary,
    synonyms,
    ranking_rules,
    typo_tolerance,
--- a/meilisearch/src/search.rs
+++ b/meilisearch/src/search.rs
@ -491,6 +491,20 @@ pub fn perform_search(
        tokenizer_builder.allow_list(&script_lang_map);
    }
    let separators = index.allowed_separators(&rtxn)?;
    let separators: Option<Vec<_>> =
        separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
    if let Some(ref separators) = separators {
        tokenizer_builder.separators(separators);
    }
    let dictionary = index.dictionary(&rtxn)?;
    let dictionary: Option<Vec<_>> =
        dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
    if let Some(ref dictionary) = dictionary {
        tokenizer_builder.words_dict(dictionary);
    }
    let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
    formatter_builder.crop_marker(query.crop_marker);
    formatter_builder.highlight_prefix(query.highlight_pre_tag);
--- a/meilisearch/tests/dumps/mod.rs
+++ b/meilisearch/tests/dumps/mod.rs
--- a/meilisearch/tests/settings/get_settings.rs
+++ b/meilisearch/tests/settings/get_settings.rs
@ -16,6 +16,9 @@ static DEFAULT_SETTINGS_VALUES: Lazy<HashMap<&'static str, Value>> = Lazy::new(|
        json!(["words", "typo", "proximity", "attribute", "sort", "exactness"]),
    );
    map.insert("stop_words", json!([]));
    map.insert("non_separator_tokens", json!([]));
    map.insert("separator_tokens", json!([]));
    map.insert("dictionary", json!([]));
    map.insert("synonyms", json!({}));
    map.insert(
        "faceting",
@ -51,7 +54,7 @@ async fn get_settings() {
    let (response, code) = index.settings().await;
    assert_eq!(code, 200);
    let settings = response.as_object().unwrap();
-    assert_eq!(settings.keys().len(), 11);
+    assert_eq!(settings.keys().len(), 14);
    assert_eq!(settings["displayedAttributes"], json!(["*"]));
    assert_eq!(settings["searchableAttributes"], json!(["*"]));
    assert_eq!(settings["filterableAttributes"], json!([]));
@ -62,6 +65,9 @@ async fn get_settings() {
        json!(["words", "typo", "proximity", "attribute", "sort", "exactness"])
    );
    assert_eq!(settings["stopWords"], json!([]));
    assert_eq!(settings["nonSeparatorTokens"], json!([]));
    assert_eq!(settings["separatorTokens"], json!([]));
    assert_eq!(settings["dictionary"], json!([]));
    assert_eq!(
        settings["faceting"],
        json!({
@ -272,6 +278,9 @@ test_setting_routes!(
    searchable_attributes put,
    distinct_attribute put,
    stop_words put,
    separator_tokens put,
    non_separator_tokens put,
    dictionary put,
    ranking_rules put,
    synonyms put,
    pagination patch,
--- a/meilisearch/tests/settings/mod.rs
+++ b/meilisearch/tests/settings/mod.rs
@ -1,3 +1,4 @@
 mod distinct;
 mod errors;
 mod get_settings;
 mod tokenizer_customization;
--- a/meilisearch/tests/settings/tokenizer_customization.rs
+++ b/meilisearch/tests/settings/tokenizer_customization.rs
@ -0,0 +1,467 @@
 use meili_snap::{json_string, snapshot};
 use serde_json::json;
 use crate::common::Server;
 #[actix_rt::test]
 async fn set_and_reset() {
    let server = Server::new().await;
    let index = server.index("test");
    let (_response, _code) = index
        .update_settings(json!({
            "nonSeparatorTokens": ["#", "&"],
            "separatorTokens": ["&sep", "<br/>"],
            "dictionary": ["J.R.R.", "J. R. R."],
        }))
        .await;
    index.wait_task(0).await;
    let (response, _) = index.settings().await;
    snapshot!(json_string!(response["nonSeparatorTokens"]), @r###"
    [
      "#",
      "&"
    ]
    "###);
    snapshot!(json_string!(response["separatorTokens"]), @r###"
    [
      "&sep",
      "<br/>"
    ]
    "###);
    snapshot!(json_string!(response["dictionary"]), @r###"
    [
      "J. R. R.",
      "J.R.R."
    ]
    "###);
    index
        .update_settings(json!({
            "nonSeparatorTokens": null,
            "separatorTokens": null,
            "dictionary": null,
        }))
        .await;
    index.wait_task(1).await;
    let (response, _) = index.settings().await;
    snapshot!(json_string!(response["nonSeparatorTokens"]), @"[]");
    snapshot!(json_string!(response["separatorTokens"]), @"[]");
    snapshot!(json_string!(response["dictionary"]), @"[]");
 }
 #[actix_rt::test]
 async fn set_and_search() {
    let documents = json!([
        {
            "id": 1,
            "content": "Mac & cheese",
        },
        {
            "id": 2,
            "content": "G#D#G#D#G#C#D#G#C#",
        },
        {
            "id": 3,
            "content": "Mac&sep&&sepcheese",
        },
    ]);
    let server = Server::new().await;
    let index = server.index("test");
    index.add_documents(documents, None).await;
    index.wait_task(0).await;
    let (_response, _code) = index
        .update_settings(json!({
            "nonSeparatorTokens": ["#", "&"],
            "separatorTokens": ["<br/>", "&sep"],
            "dictionary": ["#", "A#", "B#", "C#", "D#", "E#", "F#", "G#"],
        }))
        .await;
    index.wait_task(1).await;
    index
        .search(json!({"q": "&", "attributesToHighlight": ["content"]}), |response, code| {
            snapshot!(code, @"200 OK");
            snapshot!(json_string!(response["hits"]), @r###"
            [
              {
                "id": 1,
                "content": "Mac & cheese",
                "_formatted": {
                  "id": "1",
                  "content": "Mac <em>&</em> cheese"
                }
              },
              {
                "id": 3,
                "content": "Mac&sep&&sepcheese",
                "_formatted": {
                  "id": "3",
                  "content": "Mac&sep<em>&</em>&sepcheese"
                }
              }
            ]
            "###);
        })
        .await;
    index
        .search(
            json!({"q": "Mac & cheese", "attributesToHighlight": ["content"]}),
            |response, code| {
                snapshot!(code, @"200 OK");
                snapshot!(json_string!(response["hits"]), @r###"
                [
                  {
                    "id": 1,
                    "content": "Mac & cheese",
                    "_formatted": {
                      "id": "1",
                      "content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
                    }
                  },
                  {
                    "id": 3,
                    "content": "Mac&sep&&sepcheese",
                    "_formatted": {
                      "id": "3",
                      "content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
                    }
                  }
                ]
                "###);
            },
        )
        .await;
    index
        .search(
            json!({"q": "Mac&sep&&sepcheese", "attributesToHighlight": ["content"]}),
            |response, code| {
                snapshot!(code, @"200 OK");
                snapshot!(json_string!(response["hits"]), @r###"
                [
                  {
                    "id": 1,
                    "content": "Mac & cheese",
                    "_formatted": {
                      "id": "1",
                      "content": "<em>Mac</em> <em>&</em> <em>cheese</em>"
                    }
                  },
                  {
                    "id": 3,
                    "content": "Mac&sep&&sepcheese",
                    "_formatted": {
                      "id": "3",
                      "content": "<em>Mac</em>&sep<em>&</em>&sep<em>cheese</em>"
                    }
                  }
                ]
                "###);
            },
        )
        .await;
    index
        .search(json!({"q": "C#D#G", "attributesToHighlight": ["content"]}), |response, code| {
            snapshot!(code, @"200 OK");
            snapshot!(json_string!(response["hits"]), @r###"
            [
              {
                "id": 2,
                "content": "G#D#G#D#G#C#D#G#C#",
                "_formatted": {
                  "id": "2",
                  "content": "<em>G</em>#<em>D#</em><em>G</em>#<em>D#</em><em>G</em>#<em>C#</em><em>D#</em><em>G</em>#<em>C#</em>"
                }
              }
            ]
            "###);
        })
        .await;
    index
        .search(json!({"q": "#", "attributesToHighlight": ["content"]}), |response, code| {
            snapshot!(code, @"200 OK");
            snapshot!(json_string!(response["hits"]), @"[]");
        })
        .await;
 }
 #[actix_rt::test]
 async fn advanced_synergies() {
    let documents = json!([
        {
            "id": 1,
            "content": "J.R.R. Tolkien",
        },
        {
            "id": 2,
            "content": "J. R. R. Tolkien",
        },
        {
            "id": 3,
            "content": "jrr Tolkien",
        },
        {
            "id": 4,
            "content": "J.K. Rowlings",
        },
        {
            "id": 5,
            "content": "J. K. Rowlings",
        },
        {
            "id": 6,
            "content": "jk Rowlings",
        },
    ]);
    let server = Server::new().await;
    let index = server.index("test");
    index.add_documents(documents, None).await;
    index.wait_task(0).await;
    let (_response, _code) = index
        .update_settings(json!({
            "dictionary": ["J.R.R.", "J. R. R."],
            "synonyms": {
                "J.R.R.": ["jrr", "J. R. R."],
                "J. R. R.": ["jrr", "J.R.R."],
                "jrr": ["J.R.R.", "J. R. R."],
                "J.K.": ["jk", "J. K."],
                "J. K.": ["jk", "J.K."],
                "jk": ["J.K.", "J. K."],
            }
        }))
        .await;
    index.wait_task(1).await;
    index
        .search(json!({"q": "J.R.R.", "attributesToHighlight": ["content"]}), |response, code| {
            snapshot!(code, @"200 OK");
            snapshot!(json_string!(response["hits"]), @r###"
            [
              {
                "id": 1,
                "content": "J.R.R. Tolkien",
                "_formatted": {
                  "id": "1",
                  "content": "<em>J.R.R.</em> Tolkien"
                }
              },
              {
                "id": 2,
                "content": "J. R. R. Tolkien",
                "_formatted": {
                  "id": "2",
                  "content": "<em>J. R. R.</em> Tolkien"
                }
              },
              {
                "id": 3,
                "content": "jrr Tolkien",
                "_formatted": {
                  "id": "3",
                  "content": "<em>jrr</em> Tolkien"
                }
              }
            ]
            "###);
        })
        .await;
    index
        .search(json!({"q": "jrr", "attributesToHighlight": ["content"]}), |response, code| {
            snapshot!(code, @"200 OK");
            snapshot!(json_string!(response["hits"]), @r###"
            [
              {
                "id": 3,
                "content": "jrr Tolkien",
                "_formatted": {
                  "id": "3",
                  "content": "<em>jrr</em> Tolkien"
                }
              },
              {
                "id": 1,
                "content": "J.R.R. Tolkien",
                "_formatted": {
                  "id": "1",
                  "content": "<em>J.R.R.</em> Tolkien"
                }
              },
              {
                "id": 2,
                "content": "J. R. R. Tolkien",
                "_formatted": {
                  "id": "2",
                  "content": "<em>J. R. R.</em> Tolkien"
                }
              }
            ]
            "###);
        })
        .await;
    index
        .search(json!({"q": "J. R. R.", "attributesToHighlight": ["content"]}), |response, code| {
            snapshot!(code, @"200 OK");
            snapshot!(json_string!(response["hits"]), @r###"
            [
              {
                "id": 2,
                "content": "J. R. R. Tolkien",
                "_formatted": {
                  "id": "2",
                  "content": "<em>J. R. R.</em> Tolkien"
                }
              },
              {
                "id": 1,
                "content": "J.R.R. Tolkien",
                "_formatted": {
                  "id": "1",
                  "content": "<em>J.R.R.</em> Tolkien"
                }
              },
              {
                "id": 3,
                "content": "jrr Tolkien",
                "_formatted": {
                  "id": "3",
                  "content": "<em>jrr</em> Tolkien"
                }
              }
            ]
            "###);
        })
        .await;
    // Only update dictionary, the synonyms should be recomputed.
    let (_response, _code) = index
        .update_settings(json!({
            "dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."],
        }))
        .await;
    index.wait_task(2).await;
    index
        .search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| {
            snapshot!(code, @"200 OK");
            snapshot!(json_string!(response["hits"]), @r###"
            [
              {
                "id": 6,
                "content": "jk Rowlings",
                "_formatted": {
                  "id": "6",
                  "content": "<em>jk</em> Rowlings"
                }
              },
              {
                "id": 4,
                "content": "J.K. Rowlings",
                "_formatted": {
                  "id": "4",
                  "content": "<em>J.K.</em> Rowlings"
                }
              },
              {
                "id": 5,
                "content": "J. K. Rowlings",
                "_formatted": {
                  "id": "5",
                  "content": "<em>J. K.</em> Rowlings"
                }
              }
            ]
            "###);
        })
        .await;
    index
        .search(json!({"q": "J.K.", "attributesToHighlight": ["content"]}), |response, code| {
            snapshot!(code, @"200 OK");
            snapshot!(json_string!(response["hits"]), @r###"
            [
              {
                "id": 4,
                "content": "J.K. Rowlings",
                "_formatted": {
                  "id": "4",
                  "content": "<em>J.K.</em> Rowlings"
                }
              },
              {
                "id": 5,
                "content": "J. K. Rowlings",
                "_formatted": {
                  "id": "5",
                  "content": "<em>J. K.</em> Rowlings"
                }
              },
              {
                "id": 6,
                "content": "jk Rowlings",
                "_formatted": {
                  "id": "6",
                  "content": "<em>jk</em> Rowlings"
                }
              }
            ]
            "###);
        })
        .await;
    index
        .search(json!({"q": "J. K.", "attributesToHighlight": ["content"]}), |response, code| {
            snapshot!(code, @"200 OK");
            snapshot!(json_string!(response["hits"]), @r###"
            [
              {
                "id": 5,
                "content": "J. K. Rowlings",
                "_formatted": {
                  "id": "5",
                  "content": "<em>J. K.</em> Rowlings"
                }
              },
              {
                "id": 4,
                "content": "J.K. Rowlings",
                "_formatted": {
                  "id": "4",
                  "content": "<em>J.K.</em> Rowlings"
                }
              },
              {
                "id": 6,
                "content": "jk Rowlings",
                "_formatted": {
                  "id": "6",
                  "content": "<em>jk</em> Rowlings"
                }
              },
              {
                "id": 2,
                "content": "J. R. R. Tolkien",
                "_formatted": {
                  "id": "2",
                  "content": "<em>J. R.</em> R. Tolkien"
                }
              }
            ]
            "###);
        })
        .await;
 }
--- a/milli/src/index.rs
+++ b/milli/src/index.rs
@ -1,5 +1,5 @@
 use std::borrow::Cow;
-use std::collections::{BTreeSet, HashMap, HashSet};
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
 use std::fs::File;
 use std::mem::size_of;
 use std::path::Path;
@ -61,8 +61,12 @@ pub mod main_key {
    pub const USER_DEFINED_SEARCHABLE_FIELDS_KEY: &str = "user-defined-searchable-fields";
    pub const SOFT_EXTERNAL_DOCUMENTS_IDS_KEY: &str = "soft-external-documents-ids";
    pub const STOP_WORDS_KEY: &str = "stop-words";
    pub const NON_SEPARATOR_TOKENS_KEY: &str = "non-separator-tokens";
    pub const SEPARATOR_TOKENS_KEY: &str = "separator-tokens";
    pub const DICTIONARY_KEY: &str = "dictionary";
    pub const STRING_FACETED_DOCUMENTS_IDS_PREFIX: &str = "string-faceted-documents-ids";
    pub const SYNONYMS_KEY: &str = "synonyms";
    pub const USER_DEFINED_SYNONYMS_KEY: &str = "user-defined-synonyms";
    pub const WORDS_FST_KEY: &str = "words-fst";
    pub const WORDS_PREFIXES_FST_KEY: &str = "words-prefixes-fst";
    pub const CREATED_AT_KEY: &str = "created-at";
@ -1055,18 +1059,116 @@ impl Index {
        }
    }
    /* non separator tokens */
    pub(crate) fn put_non_separator_tokens(
        &self,
        wtxn: &mut RwTxn,
        set: &BTreeSet<String>,
    ) -> heed::Result<()> {
        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY, set)
    }
    pub(crate) fn delete_non_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
        self.main.delete::<_, Str>(wtxn, main_key::NON_SEPARATOR_TOKENS_KEY)
    }
    pub fn non_separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
        Ok(self.main.get::<_, Str, SerdeBincode<BTreeSet<String>>>(
            rtxn,
            main_key::NON_SEPARATOR_TOKENS_KEY,
        )?)
    }
    /* separator tokens */
    pub(crate) fn put_separator_tokens(
        &self,
        wtxn: &mut RwTxn,
        set: &BTreeSet<String>,
    ) -> heed::Result<()> {
        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SEPARATOR_TOKENS_KEY, set)
    }
    pub(crate) fn delete_separator_tokens(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
        self.main.delete::<_, Str>(wtxn, main_key::SEPARATOR_TOKENS_KEY)
    }
    pub fn separator_tokens(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
        Ok(self
            .main
            .get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::SEPARATOR_TOKENS_KEY)?)
    }
    /* separators easing method */
    pub fn allowed_separators(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
        let default_separators =
            charabia::separators::DEFAULT_SEPARATORS.iter().map(|s| s.to_string());
        let mut separators: Option<BTreeSet<_>> = None;
        if let Some(mut separator_tokens) = self.separator_tokens(rtxn)? {
            separator_tokens.extend(default_separators.clone());
            separators = Some(separator_tokens);
        }
        if let Some(non_separator_tokens) = self.non_separator_tokens(rtxn)? {
            separators = separators
                .or_else(|| Some(default_separators.collect()))
                .map(|separators| &separators - &non_separator_tokens);
        }
        Ok(separators)
    }
    /* dictionary */
    pub(crate) fn put_dictionary(
        &self,
        wtxn: &mut RwTxn,
        set: &BTreeSet<String>,
    ) -> heed::Result<()> {
        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::DICTIONARY_KEY, set)
    }
    pub(crate) fn delete_dictionary(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
        self.main.delete::<_, Str>(wtxn, main_key::DICTIONARY_KEY)
    }
    pub fn dictionary(&self, rtxn: &RoTxn) -> Result<Option<BTreeSet<String>>> {
        Ok(self
            .main
            .get::<_, Str, SerdeBincode<BTreeSet<String>>>(rtxn, main_key::DICTIONARY_KEY)?)
    }
    /* synonyms */
    pub(crate) fn put_synonyms(
        &self,
        wtxn: &mut RwTxn,
        synonyms: &HashMap<Vec<String>, Vec<Vec<String>>>,
        user_defined_synonyms: &BTreeMap<String, Vec<String>>,
    ) -> heed::Result<()> {
-        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)
+        self.main.put::<_, Str, SerdeBincode<_>>(wtxn, main_key::SYNONYMS_KEY, synonyms)?;
        self.main.put::<_, Str, SerdeBincode<_>>(
            wtxn,
            main_key::USER_DEFINED_SYNONYMS_KEY,
            user_defined_synonyms,
        )
    }
    pub(crate) fn delete_synonyms(&self, wtxn: &mut RwTxn) -> heed::Result<bool> {
-        self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)
+        self.main.delete::<_, Str>(wtxn, main_key::SYNONYMS_KEY)?;
        self.main.delete::<_, Str>(wtxn, main_key::USER_DEFINED_SYNONYMS_KEY)
    }
    pub fn user_defined_synonyms(
        &self,
        rtxn: &RoTxn,
    ) -> heed::Result<BTreeMap<String, Vec<String>>> {
        Ok(self
            .main
            .get::<_, Str, SerdeBincode<_>>(rtxn, main_key::USER_DEFINED_SYNONYMS_KEY)?
            .unwrap_or_default())
    }
    pub fn synonyms(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Vec<String>, Vec<Vec<String>>>> {
--- a/milli/src/search/new/mod.rs
+++ b/milli/src/search/new/mod.rs
@ -488,6 +488,20 @@ pub fn execute_search(
            tokbuilder.stop_words(stop_words);
        }
        let separators = ctx.index.allowed_separators(ctx.txn)?;
        let separators: Option<Vec<_>> =
            separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
        if let Some(ref separators) = separators {
            tokbuilder.separators(separators);
        }
        let dictionary = ctx.index.dictionary(ctx.txn)?;
        let dictionary: Option<Vec<_>> =
            dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
        if let Some(ref dictionary) = dictionary {
            tokbuilder.words_dict(dictionary);
        }
        let script_lang_map = ctx.index.script_language(ctx.txn)?;
        if !script_lang_map.is_empty() {
            tokbuilder.allow_list(&script_lang_map);
--- a/milli/src/search/new/tests/integration.rs
+++ b/milli/src/search/new/tests/integration.rs
@ -2,7 +2,7 @@ use std::io::Cursor;
 use big_s::S;
 use heed::EnvOpenOptions;
-use maplit::{hashmap, hashset};
+use maplit::{btreemap, hashset};
 use crate::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
 use crate::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
@ -33,7 +33,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
        S("tag"),
        S("asc_desc_rank"),
    });
-    builder.set_synonyms(hashmap! {
+    builder.set_synonyms(btreemap! {
        S("hello") => vec![S("good morning")],
        S("world") => vec![S("earth")],
        S("america") => vec![S("the united states")],
--- a/milli/src/search/new/tests/proximity.rs
+++ b/milli/src/search/new/tests/proximity.rs
@ -15,7 +15,7 @@ they store fewer sprximities than the regular word sprximity DB.
 */
-use std::collections::HashMap;
+use std::collections::BTreeMap;
 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
@ -336,7 +336,7 @@ fn test_proximity_split_word() {
    index
        .update_settings(|s| {
-            let mut syns = HashMap::new();
+            let mut syns = BTreeMap::new();
            syns.insert("xyz".to_owned(), vec!["sun flower".to_owned()]);
            s.set_synonyms(syns);
        })
--- a/milli/src/search/new/tests/typo.rs
+++ b/milli/src/search/new/tests/typo.rs
@ -18,7 +18,7 @@ if `words` doesn't exist before it.
 14. Synonyms cost nothing according to the typo ranking rule
 */
-use std::collections::HashMap;
+use std::collections::BTreeMap;
 use crate::index::tests::TempIndex;
 use crate::search::new::tests::collect_field_values;
@ -591,7 +591,7 @@ fn test_typo_synonyms() {
        .update_settings(|s| {
            s.set_criteria(vec![Criterion::Typo]);
-            let mut synonyms = HashMap::new();
+            let mut synonyms = BTreeMap::new();
            synonyms.insert("lackadaisical".to_owned(), vec!["lazy".to_owned()]);
            synonyms.insert("fast brownish".to_owned(), vec!["quick brown".to_owned()]);
--- a/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
+++ b/milli/src/update/index_documents/extract/extract_docid_word_positions.rs
@ -28,6 +28,8 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    indexer: GrenadParameters,
    searchable_fields: &Option<HashSet<FieldId>>,
    stop_words: Option<&fst::Set<&[u8]>>,
    allowed_separators: Option<&[&str]>,
    dictionary: Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
 ) -> Result<(RoaringBitmap, grenad::Reader<File>, ScriptLanguageDocidsMap)> {
    puffin::profile_function!();
@ -52,6 +54,12 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
    if let Some(stop_words) = stop_words {
        tokenizer_builder.stop_words(stop_words);
    }
    if let Some(dictionary) = dictionary {
        tokenizer_builder.words_dict(dictionary);
    }
    if let Some(separators) = allowed_separators {
        tokenizer_builder.separators(separators);
    }
    let tokenizer = tokenizer_builder.build();
    let mut cursor = obkv_documents.into_cursor()?;
--- a/milli/src/update/index_documents/extract/mod.rs
+++ b/milli/src/update/index_documents/extract/mod.rs
@ -49,6 +49,8 @@ pub(crate) fn data_from_obkv_documents(
    geo_fields_ids: Option<(FieldId, FieldId)>,
    vectors_field_id: Option<FieldId>,
    stop_words: Option<fst::Set<&[u8]>>,
    allowed_separators: Option<&[&str]>,
    dictionary: Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
    exact_attributes: HashSet<FieldId>,
 ) -> Result<()> {
@ -76,6 +78,8 @@ pub(crate) fn data_from_obkv_documents(
                    geo_fields_ids,
                    vectors_field_id,
                    &stop_words,
                    &allowed_separators,
                    &dictionary,
                    max_positions_per_attributes,
                )
            })
@ -289,6 +293,8 @@ fn send_and_extract_flattened_documents_data(
    geo_fields_ids: Option<(FieldId, FieldId)>,
    vectors_field_id: Option<FieldId>,
    stop_words: &Option<fst::Set<&[u8]>>,
    allowed_separators: &Option<&[&str]>,
    dictionary: &Option<&[&str]>,
    max_positions_per_attributes: Option<u32>,
 ) -> Result<(
    grenad::Reader<CursorClonableMmap>,
@ -344,6 +350,8 @@ fn send_and_extract_flattened_documents_data(
                        indexer,
                        searchable_fields,
                        stop_words.as_ref(),
                        *allowed_separators,
                        *dictionary,
                        max_positions_per_attributes,
                    )?;
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -316,6 +316,12 @@ where
        let vectors_field_id = self.index.fields_ids_map(self.wtxn)?.id("_vectors");
        let stop_words = self.index.stop_words(self.wtxn)?;
        let separators = self.index.allowed_separators(self.wtxn)?;
        let separators: Option<Vec<_>> =
            separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
        let dictionary = self.index.dictionary(self.wtxn)?;
        let dictionary: Option<Vec<_>> =
            dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
        let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
        let pool_params = GrenadParameters {
@ -353,6 +359,8 @@ where
                    geo_fields_ids,
                    vectors_field_id,
                    stop_words,
                    separators.as_deref(),
                    dictionary.as_deref(),
                    max_positions_per_attributes,
                    exact_attributes,
                )
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@ -1,4 +1,4 @@
-use std::collections::{BTreeSet, HashMap, HashSet};
+use std::collections::{BTreeMap, BTreeSet, HashMap, HashSet};
 use std::result::Result as StdResult;
 use charabia::{Normalize, Tokenizer, TokenizerBuilder};
@ -112,8 +112,11 @@ pub struct Settings<'a, 't, 'u, 'i> {
    sortable_fields: Setting<HashSet<String>>,
    criteria: Setting<Vec<Criterion>>,
    stop_words: Setting<BTreeSet<String>>,
    non_separator_tokens: Setting<BTreeSet<String>>,
    separator_tokens: Setting<BTreeSet<String>>,
    dictionary: Setting<BTreeSet<String>>,
    distinct_field: Setting<String>,
-    synonyms: Setting<HashMap<String, Vec<String>>>,
+    synonyms: Setting<BTreeMap<String, Vec<String>>>,
    primary_key: Setting<String>,
    authorize_typos: Setting<bool>,
    min_word_len_two_typos: Setting<u8>,
@ -141,6 +144,9 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
            sortable_fields: Setting::NotSet,
            criteria: Setting::NotSet,
            stop_words: Setting::NotSet,
            non_separator_tokens: Setting::NotSet,
            separator_tokens: Setting::NotSet,
            dictionary: Setting::NotSet,
            distinct_field: Setting::NotSet,
            synonyms: Setting::NotSet,
            primary_key: Setting::NotSet,
@ -205,6 +211,39 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
            if stop_words.is_empty() { Setting::Reset } else { Setting::Set(stop_words) }
    }
    pub fn reset_non_separator_tokens(&mut self) {
        self.non_separator_tokens = Setting::Reset;
    }
    pub fn set_non_separator_tokens(&mut self, non_separator_tokens: BTreeSet<String>) {
        self.non_separator_tokens = if non_separator_tokens.is_empty() {
            Setting::Reset
        } else {
            Setting::Set(non_separator_tokens)
        }
    }
    pub fn reset_separator_tokens(&mut self) {
        self.separator_tokens = Setting::Reset;
    }
    pub fn set_separator_tokens(&mut self, separator_tokens: BTreeSet<String>) {
        self.separator_tokens = if separator_tokens.is_empty() {
            Setting::Reset
        } else {
            Setting::Set(separator_tokens)
        }
    }
    pub fn reset_dictionary(&mut self) {
        self.dictionary = Setting::Reset;
    }
    pub fn set_dictionary(&mut self, dictionary: BTreeSet<String>) {
        self.dictionary =
            if dictionary.is_empty() { Setting::Reset } else { Setting::Set(dictionary) }
    }
    pub fn reset_distinct_field(&mut self) {
        self.distinct_field = Setting::Reset;
    }
@ -217,7 +256,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
        self.synonyms = Setting::Reset;
    }
-    pub fn set_synonyms(&mut self, synonyms: HashMap<String, Vec<String>>) {
+    pub fn set_synonyms(&mut self, synonyms: BTreeMap<String, Vec<String>>) {
        self.synonyms = if synonyms.is_empty() { Setting::Reset } else { Setting::Set(synonyms) }
    }
@ -452,9 +491,84 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
        }
    }
    fn update_non_separator_tokens(&mut self) -> Result<bool> {
        let changes = match self.non_separator_tokens {
            Setting::Set(ref non_separator_tokens) => {
                let current = self.index.non_separator_tokens(self.wtxn)?;
                // Does the new list differ from the previous one?
                if current.map_or(true, |current| &current != non_separator_tokens) {
                    self.index.put_non_separator_tokens(self.wtxn, non_separator_tokens)?;
                    true
                } else {
                    false
                }
            }
            Setting::Reset => self.index.delete_non_separator_tokens(self.wtxn)?,
            Setting::NotSet => false,
        };
        // the synonyms must be updated if non separator tokens have been updated.
        if changes && self.synonyms == Setting::NotSet {
            self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
        }
        Ok(changes)
    }
    fn update_separator_tokens(&mut self) -> Result<bool> {
        let changes = match self.separator_tokens {
            Setting::Set(ref separator_tokens) => {
                let current = self.index.separator_tokens(self.wtxn)?;
                // Does the new list differ from the previous one?
                if current.map_or(true, |current| &current != separator_tokens) {
                    self.index.put_separator_tokens(self.wtxn, separator_tokens)?;
                    true
                } else {
                    false
                }
            }
            Setting::Reset => self.index.delete_separator_tokens(self.wtxn)?,
            Setting::NotSet => false,
        };
        // the synonyms must be updated if separator tokens have been updated.
        if changes && self.synonyms == Setting::NotSet {
            self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
        }
        Ok(changes)
    }
    fn update_dictionary(&mut self) -> Result<bool> {
        let changes = match self.dictionary {
            Setting::Set(ref dictionary) => {
                let current = self.index.dictionary(self.wtxn)?;
                // Does the new list differ from the previous one?
                if current.map_or(true, |current| &current != dictionary) {
                    self.index.put_dictionary(self.wtxn, dictionary)?;
                    true
                } else {
                    false
                }
            }
            Setting::Reset => self.index.delete_dictionary(self.wtxn)?,
            Setting::NotSet => false,
        };
        // the synonyms must be updated if dictionary has been updated.
        if changes && self.synonyms == Setting::NotSet {
            self.synonyms = Setting::Set(self.index.user_defined_synonyms(self.wtxn)?);
        }
        Ok(changes)
    }
    fn update_synonyms(&mut self) -> Result<bool> {
        match self.synonyms {
-            Setting::Set(ref synonyms) => {
+            Setting::Set(ref user_synonyms) => {
                fn normalize(tokenizer: &Tokenizer, text: &str) -> Vec<String> {
                    tokenizer
                        .tokenize(text)
@ -473,10 +587,25 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
                if let Some(ref stop_words) = stop_words {
                    builder.stop_words(stop_words);
                }
                let separators = self.index.allowed_separators(self.wtxn)?;
                let separators: Option<Vec<_>> =
                    separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
                if let Some(ref separators) = separators {
                    builder.separators(separators);
                }
                let dictionary = self.index.dictionary(self.wtxn)?;
                let dictionary: Option<Vec<_>> =
                    dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
                if let Some(ref dictionary) = dictionary {
                    builder.words_dict(dictionary);
                }
                let tokenizer = builder.build();
                let mut new_synonyms = HashMap::new();
-                for (word, synonyms) in synonyms {
+                for (word, synonyms) in user_synonyms {
                    // Normalize both the word and associated synonyms.
                    let normalized_word = normalize(&tokenizer, word);
                    let normalized_synonyms =
@ -497,7 +626,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
                let old_synonyms = self.index.synonyms(self.wtxn)?;
                if new_synonyms != old_synonyms {
-                    self.index.put_synonyms(self.wtxn, &new_synonyms)?;
+                    self.index.put_synonyms(self.wtxn, &new_synonyms, user_synonyms)?;
                    Ok(true)
                } else {
                    Ok(false)
@ -757,11 +886,17 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
        let faceted_updated = old_faceted_fields != new_faceted_fields;
        let stop_words_updated = self.update_stop_words()?;
        let non_separator_tokens_updated = self.update_non_separator_tokens()?;
        let separator_tokens_updated = self.update_separator_tokens()?;
        let dictionary_updated = self.update_dictionary()?;
        let synonyms_updated = self.update_synonyms()?;
        let searchable_updated = self.update_searchable()?;
        let exact_attributes_updated = self.update_exact_attributes()?;
        if stop_words_updated
            || non_separator_tokens_updated
            || separator_tokens_updated
            || dictionary_updated
            || faceted_updated
            || synonyms_updated
            || searchable_updated
@ -778,7 +913,7 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
 mod tests {
    use big_s::S;
    use heed::types::ByteSlice;
-    use maplit::{btreeset, hashmap, hashset};
+    use maplit::{btreemap, btreeset, hashset};
    use super::*;
    use crate::error::Error;
@ -1244,7 +1379,7 @@ mod tests {
        // In the same transaction provide some synonyms
        index
            .update_settings_using_wtxn(&mut wtxn, |settings| {
-                settings.set_synonyms(hashmap! {
+                settings.set_synonyms(btreemap! {
                    "blini".to_string() => vec!["crepes".to_string()],
                    "super like".to_string() => vec!["love".to_string()],
                    "puppies".to_string() => vec!["dogs".to_string(), "doggos".to_string()]
@ -1540,6 +1675,9 @@ mod tests {
                    sortable_fields,
                    criteria,
                    stop_words,
                    non_separator_tokens,
                    separator_tokens,
                    dictionary,
                    distinct_field,
                    synonyms,
                    primary_key,
@ -1558,6 +1696,9 @@ mod tests {
                assert!(matches!(sortable_fields, Setting::NotSet));
                assert!(matches!(criteria, Setting::NotSet));
                assert!(matches!(stop_words, Setting::NotSet));
                assert!(matches!(non_separator_tokens, Setting::NotSet));
                assert!(matches!(separator_tokens, Setting::NotSet));
                assert!(matches!(dictionary, Setting::NotSet));
                assert!(matches!(distinct_field, Setting::NotSet));
                assert!(matches!(synonyms, Setting::NotSet));
                assert!(matches!(primary_key, Setting::NotSet));
--- a/milli/tests/search/mod.rs
+++ b/milli/tests/search/mod.rs
@ -5,7 +5,7 @@ use std::io::Cursor;
 use big_s::S;
 use either::{Either, Left, Right};
 use heed::EnvOpenOptions;
-use maplit::{hashmap, hashset};
+use maplit::{btreemap, hashset};
 use milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
 use milli::update::{IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings};
 use milli::{AscDesc, Criterion, DocumentId, Index, Member, Object, TermsMatchingStrategy};
@ -51,7 +51,7 @@ pub fn setup_search_index_with_criteria(criteria: &[Criterion]) -> Index {
        S("tag"),
        S("asc_desc_rank"),
    });
-    builder.set_synonyms(hashmap! {
+    builder.set_synonyms(btreemap! {
        S("hello") => vec![S("good morning")],
        S("world") => vec![S("earth")],
        S("america") => vec![S("the united states")],