diff --git a/meilisearch/tests/settings/tokenizer_customization.rs b/meilisearch/tests/settings/tokenizer_customization.rs
index 75bea560b..62a1440b2 100644
--- a/meilisearch/tests/settings/tokenizer_customization.rs
+++ b/meilisearch/tests/settings/tokenizer_customization.rs
@@ -194,3 +194,266 @@ async fn set_and_search() {
})
.await;
}
+
+#[actix_rt::test]
+async fn advanced_synergies() {
+ let documents = json!([
+ {
+ "id": 1,
+ "content": "J.R.R. Tolkien",
+ },
+ {
+ "id": 2,
+ "content": "J. R. R. Tolkien",
+ },
+ {
+ "id": 3,
+ "content": "jrr Tolkien",
+ },
+ {
+ "id": 4,
+ "content": "J.K. Rowlings",
+ },
+ {
+ "id": 5,
+ "content": "J. K. Rowlings",
+ },
+ {
+ "id": 6,
+ "content": "jk Rowlings",
+ },
+ ]);
+
+ let server = Server::new().await;
+ let index = server.index("test");
+
+ index.add_documents(documents, None).await;
+ index.wait_task(0).await;
+
+ let (_response, _code) = index
+ .update_settings(json!({
+ "dictionary": ["J.R.R.", "J. R. R.", "J.K.", "J. K."],
+ "synonyms": {
+ "J.R.R.": ["jrr", "J. R. R."],
+ "J. R. R.": ["jrr", "J.R.R."],
+ "jrr": ["J.R.R.", "J. R. R."],
+ "J.K.": ["jk", "J. K."],
+ "J. K.": ["jk", "J.K."],
+ "jk": ["J.K.", "J. K."],
+ }
+ }))
+ .await;
+ index.wait_task(1).await;
+
+ index
+ .search(json!({"q": "J.R.R.", "attributesToHighlight": ["content"]}), |response, code| {
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 1,
+ "content": "J.R.R. Tolkien",
+ "_formatted": {
+ "id": "1",
+ "content": "J.R.R. Tolkien"
+ }
+ },
+ {
+ "id": 2,
+ "content": "J. R. R. Tolkien",
+ "_formatted": {
+ "id": "2",
+ "content": "J. R. R. Tolkien"
+ }
+ },
+ {
+ "id": 3,
+ "content": "jrr Tolkien",
+ "_formatted": {
+ "id": "3",
+ "content": "jrr Tolkien"
+ }
+ }
+ ]
+ "###);
+ })
+ .await;
+
+ index
+ .search(json!({"q": "jrr", "attributesToHighlight": ["content"]}), |response, code| {
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 3,
+ "content": "jrr Tolkien",
+ "_formatted": {
+ "id": "3",
+ "content": "jrr Tolkien"
+ }
+ },
+ {
+ "id": 1,
+ "content": "J.R.R. Tolkien",
+ "_formatted": {
+ "id": "1",
+ "content": "J.R.R. Tolkien"
+ }
+ },
+ {
+ "id": 2,
+ "content": "J. R. R. Tolkien",
+ "_formatted": {
+ "id": "2",
+ "content": "J. R. R. Tolkien"
+ }
+ }
+ ]
+ "###);
+ })
+ .await;
+
+ index
+ .search(json!({"q": "J. R. R.", "attributesToHighlight": ["content"]}), |response, code| {
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 2,
+ "content": "J. R. R. Tolkien",
+ "_formatted": {
+ "id": "2",
+ "content": "J. R. R. Tolkien"
+ }
+ },
+ {
+ "id": 1,
+ "content": "J.R.R. Tolkien",
+ "_formatted": {
+ "id": "1",
+ "content": "J.R.R. Tolkien"
+ }
+ },
+ {
+ "id": 3,
+ "content": "jrr Tolkien",
+ "_formatted": {
+ "id": "3",
+ "content": "jrr Tolkien"
+ }
+ }
+ ]
+ "###);
+ })
+ .await;
+
+ index
+ .search(json!({"q": "jk", "attributesToHighlight": ["content"]}), |response, code| {
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 6,
+ "content": "jk Rowlings",
+ "_formatted": {
+ "id": "6",
+ "content": "jk Rowlings"
+ }
+ },
+ {
+ "id": 4,
+ "content": "J.K. Rowlings",
+ "_formatted": {
+ "id": "4",
+ "content": "J.K. Rowlings"
+ }
+ },
+ {
+ "id": 5,
+ "content": "J. K. Rowlings",
+ "_formatted": {
+ "id": "5",
+ "content": "J. K. Rowlings"
+ }
+ }
+ ]
+ "###);
+ })
+ .await;
+
+ index
+ .search(json!({"q": "J.K.", "attributesToHighlight": ["content"]}), |response, code| {
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 4,
+ "content": "J.K. Rowlings",
+ "_formatted": {
+ "id": "4",
+ "content": "J.K. Rowlings"
+ }
+ },
+ {
+ "id": 5,
+ "content": "J. K. Rowlings",
+ "_formatted": {
+ "id": "5",
+ "content": "J. K. Rowlings"
+ }
+ },
+ {
+ "id": 6,
+ "content": "jk Rowlings",
+ "_formatted": {
+ "id": "6",
+ "content": "jk Rowlings"
+ }
+ }
+ ]
+ "###);
+ })
+ .await;
+
+ index
+ .search(json!({"q": "J. K.", "attributesToHighlight": ["content"]}), |response, code| {
+ snapshot!(code, @"200 OK");
+ snapshot!(json_string!(response["hits"]), @r###"
+ [
+ {
+ "id": 5,
+ "content": "J. K. Rowlings",
+ "_formatted": {
+ "id": "5",
+ "content": "J. K. Rowlings"
+ }
+ },
+ {
+ "id": 4,
+ "content": "J.K. Rowlings",
+ "_formatted": {
+ "id": "4",
+ "content": "J.K. Rowlings"
+ }
+ },
+ {
+ "id": 6,
+ "content": "jk Rowlings",
+ "_formatted": {
+ "id": "6",
+ "content": "jk Rowlings"
+ }
+ },
+ {
+ "id": 2,
+ "content": "J. R. R. Tolkien",
+ "_formatted": {
+ "id": "2",
+ "content": "J. R. R. Tolkien"
+ }
+ }
+ ]
+ "###);
+ })
+ .await;
+}
diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs
index bdae5d7b4..8f5a71f1d 100644
--- a/milli/src/update/settings.rs
+++ b/milli/src/update/settings.rs
@@ -491,57 +491,78 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
}
fn update_non_separator_tokens(&mut self) -> Result {
- match self.non_separator_tokens {
+ let changes = match self.non_separator_tokens {
Setting::Set(ref non_separator_tokens) => {
let current = self.index.non_separator_tokens(self.wtxn)?;
// Does the new list differ from the previous one?
if current.map_or(true, |current| ¤t != non_separator_tokens) {
self.index.put_non_separator_tokens(self.wtxn, non_separator_tokens)?;
- Ok(true)
+ true
} else {
- Ok(false)
+ false
}
}
- Setting::Reset => Ok(self.index.delete_non_separator_tokens(self.wtxn)?),
- Setting::NotSet => Ok(false),
+ Setting::Reset => self.index.delete_non_separator_tokens(self.wtxn)?,
+ Setting::NotSet => false,
+ };
+
+ // the synonyms must be updated if non separator tokens have been updated.
+ if changes {
+ self.update_synonyms()?;
}
+
+ Ok(changes)
}
fn update_separator_tokens(&mut self) -> Result {
- match self.separator_tokens {
+ let changes = match self.separator_tokens {
Setting::Set(ref separator_tokens) => {
let current = self.index.separator_tokens(self.wtxn)?;
// Does the new list differ from the previous one?
if current.map_or(true, |current| ¤t != separator_tokens) {
self.index.put_separator_tokens(self.wtxn, separator_tokens)?;
- Ok(true)
+ true
} else {
- Ok(false)
+ false
}
}
- Setting::Reset => Ok(self.index.delete_separator_tokens(self.wtxn)?),
- Setting::NotSet => Ok(false),
+ Setting::Reset => self.index.delete_separator_tokens(self.wtxn)?,
+ Setting::NotSet => false,
+ };
+
+ // the synonyms must be updated if separator tokens have been updated.
+ if changes {
+ self.update_synonyms()?;
}
+
+ Ok(changes)
}
fn update_dictionary(&mut self) -> Result {
- match self.dictionary {
+ let changes = match self.dictionary {
Setting::Set(ref dictionary) => {
let current = self.index.dictionary(self.wtxn)?;
// Does the new list differ from the previous one?
if current.map_or(true, |current| ¤t != dictionary) {
self.index.put_dictionary(self.wtxn, dictionary)?;
- Ok(true)
+ true
} else {
- Ok(false)
+ false
}
}
- Setting::Reset => Ok(self.index.delete_dictionary(self.wtxn)?),
- Setting::NotSet => Ok(false),
+ Setting::Reset => self.index.delete_dictionary(self.wtxn)?,
+ Setting::NotSet => false,
+ };
+
+ // the synonyms must be updated if dictionary has been updated.
+ if changes {
+ self.update_synonyms()?;
}
+
+ Ok(changes)
}
fn update_synonyms(&mut self) -> Result {
@@ -565,6 +586,21 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
if let Some(ref stop_words) = stop_words {
builder.stop_words(stop_words);
}
+
+ let separators = self.index.allowed_separators(self.wtxn)?;
+ let separators: Option> =
+ separators.as_ref().map(|x| x.iter().map(String::as_str).collect());
+ if let Some(ref separators) = separators {
+ builder.separators(separators);
+ }
+
+ let dictionary = self.index.dictionary(self.wtxn)?;
+ let dictionary: Option> =
+ dictionary.as_ref().map(|x| x.iter().map(String::as_str).collect());
+ if let Some(ref dictionary) = dictionary {
+ builder.words_dict(dictionary);
+ }
+
let tokenizer = builder.build();
let mut new_synonyms = HashMap::new();