From fc2590fc9dfdf283a15851d367a4b30bcf09aee6 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 8 Aug 2023 16:43:08 +0200 Subject: [PATCH 1/2] Add a test --- meilisearch/tests/search/mod.rs | 56 +++++++++++++++++++++++++++++++++ 1 file changed, 56 insertions(+) diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 6fcc33309..3aefe7e83 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -1104,3 +1104,59 @@ async fn camelcased_words() { }) .await; } + +#[actix_rt::test] +async fn simple_search_with_strange_synonyms() { + let server = Server::new().await; + let index = server.index("test"); + + index.update_settings(json!({ "synonyms": {"&": ["to"], "to": ["&"]} })).await; + let r = index.wait_task(0).await; + meili_snap::snapshot!(r["status"], @r###""succeeded""###); + + let documents = DOCUMENTS.clone(); + index.add_documents(documents, None).await; + index.wait_task(1).await; + + index + .search(json!({"q": "How to train"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "How & train"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "to"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "title": "How to Train Your Dragon: The Hidden World", + "id": "166428" + } + ] + "###); + }) + .await; +} From 8dc5acf998a3f7caf0126983bfa45265e7efed94 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Tue, 8 Aug 2023 16:52:36 +0200 Subject: [PATCH 2/2] Try fix --- milli/src/update/settings.rs | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) diff --git a/milli/src/update/settings.rs b/milli/src/update/settings.rs index 33f86a4bb..d3fdac0c7 100644 --- a/milli/src/update/settings.rs +++ b/milli/src/update/settings.rs @@ -477,13 +477,18 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> { for (word, synonyms) in synonyms { // Normalize both the word and associated synonyms. let normalized_word = normalize(&tokenizer, word); - let normalized_synonyms = - synonyms.iter().map(|synonym| normalize(&tokenizer, synonym)); + let normalized_synonyms: Vec<_> = synonyms + .iter() + .map(|synonym| normalize(&tokenizer, synonym)) + .filter(|synonym| !synonym.is_empty()) + .collect(); // Store the normalized synonyms under the normalized word, // merging the possible duplicate words. - let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new); - entry.extend(normalized_synonyms); + if !normalized_word.is_empty() && !normalized_synonyms.is_empty() { + let entry = new_synonyms.entry(normalized_word).or_insert_with(Vec::new); + entry.extend(normalized_synonyms.into_iter()); + } } // Make sure that we don't have duplicate synonyms.