mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 12:27:13 +02:00
Merge #4945
4945: Add swedish in default pipelines r=dureuill a=ManyTheFish # Summary ## Fix Swedish support In Swedish the characters `å`/`ä`/`ö` are completely different than `a` or `o` and should not be normalized as the same character. because the Swedish specialized pipeline was not activated by default, these characters were normalized even with the settings: ```json { "localizedAttributes": [ { "locales": ["swe"], "attributePatterns": ["*"] } ] } ``` ## Update Charabia adding German support German segmentation will now be activated using the setting: ```json { "localizedAttributes": [ { "locales": ["deu"], "attributePatterns": ["*"] } ] } ``` # TODO - [x] Activate Swedish Pipeline - [x] Add a test to avoid future regressions - [x] Update Charabia Co-authored-by: ManyTheFish <many@meilisearch.com>
This commit is contained in:
commit
bd34ed01d9
6 changed files with 215 additions and 10 deletions
|
@ -17,7 +17,7 @@ bincode = "1.3.3"
|
|||
bstr = "1.9.1"
|
||||
bytemuck = { version = "1.16.1", features = ["extern_crate_alloc"] }
|
||||
byteorder = "1.5.0"
|
||||
charabia = { version = "0.9.0", default-features = false }
|
||||
charabia = { version = "0.9.1", default-features = false }
|
||||
concat-arrays = "0.1.2"
|
||||
crossbeam-channel = "0.5.13"
|
||||
deserr = "0.6.2"
|
||||
|
@ -106,6 +106,8 @@ all-tokenizations = [
|
|||
"charabia/greek",
|
||||
"charabia/khmer",
|
||||
"charabia/vietnamese",
|
||||
"charabia/swedish-recomposition",
|
||||
"charabia/german-segmentation",
|
||||
]
|
||||
|
||||
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
||||
|
@ -138,6 +140,9 @@ khmer = ["charabia/khmer"]
|
|||
# allow vietnamese specialized tokenization
|
||||
vietnamese = ["charabia/vietnamese"]
|
||||
|
||||
# allow german specialized tokenization
|
||||
german = ["charabia/german-segmentation"]
|
||||
|
||||
# force swedish character recomposition
|
||||
swedish-recomposition = ["charabia/swedish-recomposition"]
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue