deactivate camelCase segmentation

This commit is contained in:
ManyTheFish 2023-07-12 16:47:30 +02:00
parent 9c0691156f
commit c106906f8f
4 changed files with 52 additions and 11 deletions

7
Cargo.lock generated
View File

@ -709,7 +709,6 @@ dependencies = [
"csv", "csv",
"deunicode", "deunicode",
"either", "either",
"finl_unicode",
"fst", "fst",
"irg-kvariants", "irg-kvariants",
"jieba-rs", "jieba-rs",
@ -1443,12 +1442,6 @@ dependencies = [
"nom_locate", "nom_locate",
] ]
[[package]]
name = "finl_unicode"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fcfdc7a0362c9f4444381a9e697c79d435fe65b52a37466fc2c1184cee9edc6"
[[package]] [[package]]
name = "flate2" name = "flate2"
version = "1.0.26" version = "1.0.26"

View File

@ -888,7 +888,8 @@ async fn camelcased_words() {
{ "id": 0, "title": "DeLonghi" }, { "id": 0, "title": "DeLonghi" },
{ "id": 1, "title": "delonghi" }, { "id": 1, "title": "delonghi" },
{ "id": 2, "title": "TestAB" }, { "id": 2, "title": "TestAB" },
{ "id": 3, "title": "testab" }, { "id": 3, "title": "TestAb" },
{ "id": 4, "title": "testab" },
]); ]);
index.add_documents(documents, None).await; index.add_documents(documents, None).await;
index.wait_task(0).await; index.wait_task(0).await;
@ -940,6 +941,10 @@ async fn camelcased_words() {
}, },
{ {
"id": 3, "id": 3,
"title": "TestAb"
},
{
"id": 4,
"title": "testab" "title": "testab"
} }
] ]
@ -958,6 +963,10 @@ async fn camelcased_words() {
}, },
{ {
"id": 3, "id": 3,
"title": "TestAb"
},
{
"id": 4,
"title": "testab" "title": "testab"
} }
] ]
@ -976,6 +985,10 @@ async fn camelcased_words() {
}, },
{ {
"id": 3, "id": 3,
"title": "TestAb"
},
{
"id": 4,
"title": "testab" "title": "testab"
} }
] ]
@ -994,6 +1007,10 @@ async fn camelcased_words() {
}, },
{ {
"id": 3, "id": 3,
"title": "TestAb"
},
{
"id": 4,
"title": "testab" "title": "testab"
} }
] ]
@ -1012,6 +1029,10 @@ async fn camelcased_words() {
}, },
{ {
"id": 3, "id": 3,
"title": "TestAb"
},
{
"id": 4,
"title": "testab" "title": "testab"
} }
] ]
@ -1019,8 +1040,27 @@ async fn camelcased_words() {
}) })
.await; .await;
// with Typos
index index
.search(json!({"q": "tetsab"}), |response, code| { .search(json!({"q": "dellonghi"}), |response, code| {
meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[
{
"id": 0,
"title": "DeLonghi"
},
{
"id": 1,
"title": "delonghi"
}
]
"###);
})
.await;
index
.search(json!({"q": "TetsAB"}), |response, code| {
meili_snap::snapshot!(code, @"200 OK"); meili_snap::snapshot!(code, @"200 OK");
meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###" meili_snap::snapshot!(meili_snap::json_string!(response["hits"]), @r###"
[ [
@ -1030,6 +1070,10 @@ async fn camelcased_words() {
}, },
{ {
"id": 3, "id": 3,
"title": "TestAb"
},
{
"id": 4,
"title": "testab" "title": "testab"
} }
] ]
@ -1048,6 +1092,10 @@ async fn camelcased_words() {
}, },
{ {
"id": 3, "id": 3,
"title": "TestAb"
},
{
"id": 4,
"title": "testab" "title": "testab"
} }
] ]

View File

@ -240,7 +240,7 @@ async fn exactness_ranking_rule_order() {
}, },
{ {
"title": "Captain Marvel", "title": "Captain Marvel",
"desc": "CaptainMarvel", "desc": "Captain the Marvel",
"id": "2", "id": "2",
}]), }]),
) )

View File

@ -81,7 +81,7 @@ md5 = "0.7.0"
rand = { version = "0.8.5", features = ["small_rng"] } rand = { version = "0.8.5", features = ["small_rng"] }
[features] [features]
all-tokenizations = ["charabia/default"] all-tokenizations = ["charabia/chinese", "charabia/hebrew", "charabia/japanese", "charabia/thai", "charabia/korean", "charabia/greek"]
# Use POSIX semaphores instead of SysV semaphores in LMDB # Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml # For more information on this feature, see heed's Cargo.toml