Add german tokenization pipeline

This commit is contained in:
ManyTheFish 2024-09-19 13:30:07 +02:00
parent f77661ec44
commit 7d6768e4c4
3 changed files with 7 additions and 0 deletions

View File

@ -66,3 +66,5 @@ khmer = ["milli/khmer"]
vietnamese = ["milli/vietnamese"] vietnamese = ["milli/vietnamese"]
# force swedish character recomposition # force swedish character recomposition
swedish-recomposition = ["milli/swedish-recomposition"] swedish-recomposition = ["milli/swedish-recomposition"]
# force german character recomposition
german = ["milli/german"]

View File

@ -153,6 +153,7 @@ greek = ["meilisearch-types/greek"]
khmer = ["meilisearch-types/khmer"] khmer = ["meilisearch-types/khmer"]
vietnamese = ["meilisearch-types/vietnamese"] vietnamese = ["meilisearch-types/vietnamese"]
swedish-recomposition = ["meilisearch-types/swedish-recomposition"] swedish-recomposition = ["meilisearch-types/swedish-recomposition"]
german = ["meilisearch-types/german"]
[package.metadata.mini-dashboard] [package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip" assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip"

View File

@ -107,6 +107,7 @@ all-tokenizations = [
"charabia/khmer", "charabia/khmer",
"charabia/vietnamese", "charabia/vietnamese",
"charabia/swedish-recomposition", "charabia/swedish-recomposition",
"charabia/german-segmentation",
] ]
# Use POSIX semaphores instead of SysV semaphores in LMDB # Use POSIX semaphores instead of SysV semaphores in LMDB
@ -139,6 +140,9 @@ khmer = ["charabia/khmer"]
# allow vietnamese specialized tokenization # allow vietnamese specialized tokenization
vietnamese = ["charabia/vietnamese"] vietnamese = ["charabia/vietnamese"]
# allow german specialized tokenization
german = ["charabia/german-segmentation"]
# force swedish character recomposition # force swedish character recomposition
swedish-recomposition = ["charabia/swedish-recomposition"] swedish-recomposition = ["charabia/swedish-recomposition"]