4684: Update Charabia v0.8.11 r=irevoire a=ManyTheFish

# Update Charabia v0.8.11

### Adds a new normalizer to normalize œ to oe and æ to ae
Now search words containing `œ` or `æ` will be retrieved using `oe` or `ae`, like `Daemon` <=> `Dæmon`

### Fix: make `chinese-normalization-pinyin` feature flag compile
Fixes #4629



Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
meili-bors[bot] 2024-06-06 08:59:49 +00:00 committed by GitHub
commit cb765ad249
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
2 changed files with 118 additions and 132 deletions

248
Cargo.lock generated
View File

@ -898,9 +898,9 @@ dependencies = [
[[package]] [[package]]
name = "charabia" name = "charabia"
version = "0.8.10" version = "0.8.11"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "933f20f2269b24d32fd5503e7b3c268af902190daf8d9d2b73ed2e75d77c00b4" checksum = "11a09ae38cfcc153f01576c3f579dfd916e0320f1b474f298c8d680b2dd92eb6"
dependencies = [ dependencies = [
"aho-corasick", "aho-corasick",
"cow-utils", "cow-utils",
@ -989,7 +989,7 @@ dependencies = [
"anstream", "anstream",
"anstyle", "anstyle",
"clap_lex", "clap_lex",
"strsim", "strsim 0.10.0",
] ]
[[package]] [[package]]
@ -1280,12 +1280,12 @@ dependencies = [
[[package]] [[package]]
name = "darling" name = "darling"
version = "0.20.3" version = "0.20.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0209d94da627ab5605dcccf08bb18afa5009cfbef48d8a8b7d7bdbc79be25c5e" checksum = "83b2eb4d90d12bdda5ed17de686c2acb4c57914f8f921b8da7e112b5a36f3fe1"
dependencies = [ dependencies = [
"darling_core 0.20.3", "darling_core 0.20.9",
"darling_macro 0.20.3", "darling_macro 0.20.9",
] ]
[[package]] [[package]]
@ -1298,21 +1298,21 @@ dependencies = [
"ident_case", "ident_case",
"proc-macro2", "proc-macro2",
"quote", "quote",
"strsim", "strsim 0.10.0",
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]] [[package]]
name = "darling_core" name = "darling_core"
version = "0.20.3" version = "0.20.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "177e3443818124b357d8e76f53be906d60937f0d3a90773a664fa63fa253e621" checksum = "622687fe0bac72a04e5599029151f5796111b90f1baaa9b544d807a5e31cd120"
dependencies = [ dependencies = [
"fnv", "fnv",
"ident_case", "ident_case",
"proc-macro2", "proc-macro2",
"quote", "quote",
"strsim", "strsim 0.11.1",
"syn 2.0.60", "syn 2.0.60",
] ]
@ -1329,11 +1329,11 @@ dependencies = [
[[package]] [[package]]
name = "darling_macro" name = "darling_macro"
version = "0.20.3" version = "0.20.9"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "836a9bbc7ad63342d6d6e7b815ccab164bc77a2d95d84bc3117a8c0d5c98e2d5" checksum = "733cabb43482b1a1b53eee8583c2b9e8684d592215ea83efd305dd31bc2f0178"
dependencies = [ dependencies = [
"darling_core 0.20.3", "darling_core 0.20.9",
"quote", "quote",
"syn 2.0.60", "syn 2.0.60",
] ]
@ -1386,6 +1386,15 @@ dependencies = [
"derive_builder_macro 0.13.1", "derive_builder_macro 0.13.1",
] ]
[[package]]
name = "derive_builder"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0350b5cb0331628a5916d6c5c0b72e97393b8b6b03b47a9284f4e7f5a405ffd7"
dependencies = [
"derive_builder_macro 0.20.0",
]
[[package]] [[package]]
name = "derive_builder_core" name = "derive_builder_core"
version = "0.12.0" version = "0.12.0"
@ -1410,6 +1419,18 @@ dependencies = [
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]]
name = "derive_builder_core"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d48cda787f839151732d396ac69e3473923d54312c070ee21e9effcaa8ca0b1d"
dependencies = [
"darling 0.20.9",
"proc-macro2",
"quote",
"syn 2.0.60",
]
[[package]] [[package]]
name = "derive_builder_macro" name = "derive_builder_macro"
version = "0.12.0" version = "0.12.0"
@ -1430,6 +1451,16 @@ dependencies = [
"syn 1.0.109", "syn 1.0.109",
] ]
[[package]]
name = "derive_builder_macro"
version = "0.20.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "206868b8242f27cecce124c19fd88157fbd0dd334df2587f36417bafbc85097b"
dependencies = [
"derive_builder_core 0.20.0",
"syn 2.0.60",
]
[[package]] [[package]]
name = "derive_more" name = "derive_more"
version = "0.99.17" version = "0.99.17"
@ -1457,7 +1488,7 @@ dependencies = [
"serde-cs", "serde-cs",
"serde_json", "serde_json",
"serde_urlencoded", "serde_urlencoded",
"strsim", "strsim 0.10.0",
] ]
[[package]] [[package]]
@ -1710,29 +1741,6 @@ dependencies = [
"syn 2.0.60", "syn 2.0.60",
] ]
[[package]]
name = "env_filter"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a009aa4810eb158359dda09d0c87378e4bbb89b5a801f016885a4707ba24f7ea"
dependencies = [
"log",
"regex",
]
[[package]]
name = "env_logger"
version = "0.11.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "38b35839ba51819680ba087cd351788c9a3c476841207e0b8cee0b04722343b9"
dependencies = [
"anstream",
"anstyle",
"env_filter",
"humantime",
"log",
]
[[package]] [[package]]
name = "equivalent" name = "equivalent"
version = "1.0.1" version = "1.0.1"
@ -1787,7 +1795,7 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5" checksum = "d15473d7f83b54a44826907af16ae5727eaacaf6e53b51474016d3efd9aa35d5"
dependencies = [ dependencies = [
"darling 0.20.3", "darling 0.20.9",
"proc-macro2", "proc-macro2",
"quote", "quote",
"syn 2.0.60", "syn 2.0.60",
@ -2382,12 +2390,6 @@ version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421" checksum = "c4a1e36c821dbe04574f602848a19f742f4fb3c98d40449f11bcad18d6b17421"
[[package]]
name = "humantime"
version = "2.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "9a3a5bfb195931eeb336b2a7b4d761daec841b97f947d34394601737a7bba5e4"
[[package]] [[package]]
name = "hyper" name = "hyper"
version = "0.14.27" version = "0.14.27"
@ -2781,9 +2783,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera" name = "lindera"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a1bbf252ea3490053dc397539ece0b510924f2f72605fa28d3e858d86f43ec88" checksum = "dcd4fa369654517f72c10b24adf03ad4ce69d19facb79c3cb3cf9b4580ac352f"
dependencies = [ dependencies = [
"lindera-analyzer", "lindera-analyzer",
"lindera-core", "lindera-core",
@ -2794,9 +2796,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-analyzer" name = "lindera-analyzer"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "87febfec0e2859ce2154fb90dd6f66b774ddb0b6e264b44f8e3d1303c9dcedd7" checksum = "c2cba7fe275cb8ec4c594cfee9cc39e48b71e02a089457d52f3e70dc146a8133"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -2824,9 +2826,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict" name = "lindera-cc-cedict"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "fcb91bb8a93ab0f95dbc3c43b5105354bb059134ef731154f75a64b5d919e71d" checksum = "240adf9faba3f09ad16557aefcd316dd00ebb940ac94334a629660d772f118c1"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -2838,29 +2840,21 @@ dependencies = [
[[package]] [[package]]
name = "lindera-cc-cedict-builder" name = "lindera-cc-cedict-builder"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6022a8309a287dbef425fd09a61585351670c83001d74f6c089979e2330b683" checksum = "f12241f9e74babe708a0b9441d9f3fa67cb29fd01257918f30ffd480ca568820"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"log", "lindera-dictionary-builder",
"yada",
] ]
[[package]] [[package]]
name = "lindera-compress" name = "lindera-compress"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32363cbcf433f915e7d77c2a0c410db2d6b23442e80715cf2cf6b9864078a500" checksum = "50f9f7a858d70ff9e4383cbd507ca9e98c8faf0319e08c10df4c30cb58c9ca6c"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@ -2869,9 +2863,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-core" name = "lindera-core"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9a0e858753a02b1a3524fae4fbb11ca4b3a947128fd7854b797386562678be8" checksum = "7f09810ab98ce2a084d788ac38fbb7b31697f34bc47c61de0d880320a674bd15"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -2886,9 +2880,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-decompress" name = "lindera-decompress"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0e406345f6f8b665b9a129c67079c18ca9d97e9d171d102b4106a64a592c285e" checksum = "d53400c9b2dd6b45f82d9fa5b5efe079f3acaf6ce609dba8d42c8a76baaa2b12"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"flate2", "flate2",
@ -2897,9 +2891,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-dictionary" name = "lindera-dictionary"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2a3ec0e5fd6768a27c6ec1040e8470d3a5926418f7afe065859e98aabb3bfe" checksum = "2053d064a515839250438b8dfa6cf445e2b97633232ded34a54f267e945d196e"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode", "bincode",
@ -2921,10 +2915,32 @@ dependencies = [
] ]
[[package]] [[package]]
name = "lindera-filter" name = "lindera-dictionary-builder"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1badaf51bad051185ea4917ba91bbbf2d6f8167e155647e21e0eaaef0982a95d" checksum = "14f486924055f8bedcc5877572e4dc91fbc10370862430ac2e5f7f0d671a18c8"
dependencies = [
"anyhow",
"bincode",
"byteorder",
"csv",
"derive_builder 0.20.0",
"encoding",
"encoding_rs",
"encoding_rs_io",
"glob",
"lindera-compress",
"lindera-core",
"lindera-decompress",
"log",
"yada",
]
[[package]]
name = "lindera-filter"
version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb3904fc279f0297f6fd6210435adab1f8c82ba84eba8635407c791af51c0d8a"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"csv", "csv",
@ -2947,9 +2963,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic" name = "lindera-ipadic"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "129ec16366354998f9791467ad38731539197747f649e573ead845358271ce25" checksum = "4aa3ef2f1f6838b0fa2e2fca2896242bb83bc877c1760cdb6fa23449ab95d664"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -2961,31 +2977,21 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-builder" name = "lindera-ipadic-builder"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f0979a56bc57e9c9be2996dff232c47aa146a2e7baebf5dd567e388eba3dd90" checksum = "a41287db18eadb58d73a04d49778d41c161549fbbbe155d4338976b7b8541c7d"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding_rs",
"encoding_rs_io",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"log", "lindera-dictionary-builder",
"serde",
"yada",
] ]
[[package]] [[package]]
name = "lindera-ipadic-neologd" name = "lindera-ipadic-neologd"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "20076660c4e79ef0316735b44e18ec7644e54786acdee8946c972d5f97086d0f" checksum = "49382256f245078400bf7e72663f9eb30afcd9ed54cd46f29d7db1be529678e1"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -2997,31 +3003,21 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ipadic-neologd-builder" name = "lindera-ipadic-neologd-builder"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "eccd18ed5f65d1d64ac0cbfa1d6827bfbbaf6530520ae6847e6a91ee38f47e20" checksum = "5ae9cfd2fda68ef526ef0c7b50c5d4d5582a4daa6ecd0cea9e2b0b62564a2a5d"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding_rs",
"encoding_rs_io",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"log", "lindera-dictionary-builder",
"serde",
"yada",
] ]
[[package]] [[package]]
name = "lindera-ko-dic" name = "lindera-ko-dic"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "59073171566c3e498ca048e84c2d0a7e117a42f36c8eb7d7163e65ac38bd6d48" checksum = "7f86d03a863f3ae1d269e7b7d4dd2cce9385a53463479bafc5d7aa48719f36db"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3037,29 +3033,21 @@ dependencies = [
[[package]] [[package]]
name = "lindera-ko-dic-builder" name = "lindera-ko-dic-builder"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae176afa8535ca2a5ee9471873f85d531db0a6c32a3c42b41084506aac22b577" checksum = "bd0f44f2e56358c5879dfb5e7f76cc6ba7853ec31082c4e3f8fb65fb2d849c51"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"log", "lindera-dictionary-builder",
"yada",
] ]
[[package]] [[package]]
name = "lindera-tokenizer" name = "lindera-tokenizer"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "457285bdde84571aa510c9e05371904305a55e8a541fa1473d4393062f06932d" checksum = "7c5182735cdc2832ac757b31e8a5b150a3514357a30efe3dec212f8dcb06ba14"
dependencies = [ dependencies = [
"bincode", "bincode",
"lindera-core", "lindera-core",
@ -3071,9 +3059,9 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic" name = "lindera-unidic"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "5839980be552dfa639b70964c61914a9ad014148663679b0e148aa72e5e30f23" checksum = "6c63da104728dd1cf14bfa564753cbfa996f6078ed2e23e31475bd1d639fc597"
dependencies = [ dependencies = [
"bincode", "bincode",
"byteorder", "byteorder",
@ -3089,22 +3077,14 @@ dependencies = [
[[package]] [[package]]
name = "lindera-unidic-builder" name = "lindera-unidic-builder"
version = "0.30.0" version = "0.31.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "dcaab8f061d5b944b1e424f49c7efbf8f276e8a72e4f4ff956d01e46d481f008" checksum = "04acecbc068dac21766a1b7ed1f2608b6f250d10b4f8bff67abc2a00437a0974"
dependencies = [ dependencies = [
"anyhow", "anyhow",
"bincode",
"byteorder",
"csv",
"encoding",
"env_logger",
"glob",
"lindera-compress",
"lindera-core", "lindera-core",
"lindera-decompress", "lindera-decompress",
"log", "lindera-dictionary-builder",
"yada",
] ]
[[package]] [[package]]
@ -4909,6 +4889,12 @@ version = "0.10.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623" checksum = "73473c0e59e6d5812c5dfe2a064a6444949f089e20eec9a2e5506596494e4623"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]] [[package]]
name = "strum" name = "strum"
version = "0.26.2" version = "0.26.2"

View File

@ -17,7 +17,7 @@ bincode = "1.3.3"
bstr = "1.9.0" bstr = "1.9.0"
bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] } bytemuck = { version = "1.14.0", features = ["extern_crate_alloc"] }
byteorder = "1.5.0" byteorder = "1.5.0"
charabia = { version = "0.8.10", default-features = false } charabia = { version = "0.8.11", default-features = false }
concat-arrays = "0.1.2" concat-arrays = "0.1.2"
crossbeam-channel = "0.5.11" crossbeam-channel = "0.5.11"
deserr = "0.6.1" deserr = "0.6.1"