From 9c0691156f0b1c6ab0c4426eca159860d394be31 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Mon, 10 Jul 2023 14:01:14 +0200 Subject: [PATCH 1/2] Add tests --- meilisearch/tests/search/mod.rs | 179 ++++++++++++++++++++++++++++++++ 1 file changed, 179 insertions(+) diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index e6eae7cb1..1aa1d5805 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -876,3 +876,182 @@ async fn experimental_feature_vector_store() { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @"[]"); } + +#[cfg(feature = "default")] +#[actix_rt::test] +async fn camelcased_words() { + let server = Server::new().await; + let index = server.index("test"); + + // related to https://github.com/meilisearch/meilisearch/issues/3818 + let documents = json!([ + { "id": 0, "title": "DeLonghi" }, + { "id": 1, "title": "delonghi" }, + { "id": 2, "title": "TestAB" }, + { "id": 3, "title": "testab" }, + ]); + index.add_documents(documents, None).await; + index.wait_task(0).await; + + index + .search(json!({"q": "deLonghi"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "title": "DeLonghi" + }, + { + "id": 1, + "title": "delonghi" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "dellonghi"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "title": "DeLonghi" + }, + { + "id": 1, + "title": "delonghi" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "testa"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "testab" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "testab"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "testab" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "TestaB"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "testab" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "Testab"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "testab" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "TestAb"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "testab" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "tetsab"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "testab" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "TetsAB"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 2, + "title": "TestAB" + }, + { + "id": 3, + "title": "testab" + } + ] + "###); + }) + .await; +} From c106906f8f339dad14ad8fd72dd5213dcdd27653 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 12 Jul 2023 16:47:30 +0200 Subject: [PATCH 2/2] deactivate camelCase segmentation --- Cargo.lock | 7 --- meilisearch/tests/search/mod.rs | 52 ++++++++++++++++++- .../tests/search/restrict_searchable.rs | 2 +- milli/Cargo.toml | 2 +- 4 files changed, 52 insertions(+), 11 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index bb6105741..e8747dc7a 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -709,7 +709,6 @@ dependencies = [ "csv", "deunicode", "either", - "finl_unicode", "fst", "irg-kvariants", "jieba-rs", @@ -1443,12 +1442,6 @@ dependencies = [ "nom_locate", ] -[[package]] -name = "finl_unicode" -version = "1.2.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6" - [[package]] name = "flate2" version = "1.0.26" diff --git a/meilisearch/tests/search/mod.rs b/meilisearch/tests/search/mod.rs index 1aa1d5805..9c80aed31 100644 --- a/meilisearch/tests/search/mod.rs +++ b/meilisearch/tests/search/mod.rs @@ -888,7 +888,8 @@ async fn camelcased_words() { { "id": 0, "title": "DeLonghi" }, { "id": 1, "title": "delonghi" }, { "id": 2, "title": "TestAB" }, - { "id": 3, "title": "testab" }, + { "id": 3, "title": "TestAb" }, + { "id": 4, "title": "testab" }, ]); index.add_documents(documents, None).await; index.wait_task(0).await; @@ -940,6 +941,10 @@ async fn camelcased_words() { }, { "id": 3, + "title": "TestAb" + }, + { + "id": 4, "title": "testab" } ] @@ -958,6 +963,10 @@ async fn camelcased_words() { }, { "id": 3, + "title": "TestAb" + }, + { + "id": 4, "title": "testab" } ] @@ -976,6 +985,10 @@ async fn camelcased_words() { }, { "id": 3, + "title": "TestAb" + }, + { + "id": 4, "title": "testab" } ] @@ -994,6 +1007,10 @@ async fn camelcased_words() { }, { "id": 3, + "title": "TestAb" + }, + { + "id": 4, "title": "testab" } ] @@ -1012,6 +1029,10 @@ async fn camelcased_words() { }, { "id": 3, + "title": "TestAb" + }, + { + "id": 4, "title": "testab" } ] @@ -1019,8 +1040,27 @@ async fn camelcased_words() { }) .await; + // with Typos index - .search(json!({"q": "tetsab"}), |response, code| { + .search(json!({"q": "dellonghi"}), |response, code| { + meili_snap::snapshot!(code, @"200 OK"); + meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" + [ + { + "id": 0, + "title": "DeLonghi" + }, + { + "id": 1, + "title": "delonghi" + } + ] + "###); + }) + .await; + + index + .search(json!({"q": "TetsAB"}), |response, code| { meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" [ @@ -1030,6 +1070,10 @@ async fn camelcased_words() { }, { "id": 3, + "title": "TestAb" + }, + { + "id": 4, "title": "testab" } ] @@ -1048,6 +1092,10 @@ async fn camelcased_words() { }, { "id": 3, + "title": "TestAb" + }, + { + "id": 4, "title": "testab" } ] diff --git a/meilisearch/tests/search/restrict_searchable.rs b/meilisearch/tests/search/restrict_searchable.rs index f119acea5..309729fca 100644 --- a/meilisearch/tests/search/restrict_searchable.rs +++ b/meilisearch/tests/search/restrict_searchable.rs @@ -240,7 +240,7 @@ async fn exactness_ranking_rule_order() { }, { "title": "Captain Marvel", - "desc": "CaptainMarvel", + "desc": "Captain the Marvel", "id": "2", }]), ) diff --git a/milli/Cargo.toml b/milli/Cargo.toml index bc1d9b7ee..854d29141 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -81,7 +81,7 @@ md5 = "0.7.0" rand = { version = "0.8.5", features = ["small_rng"] } [features] -all-tokenizations = ["charabia/default"] +all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml