From e9580fe61946477d83b9222ad4c00058a9868824 Mon Sep 17 00:00:00 2001 From: ManyTheFish Date: Wed, 25 Sep 2024 11:03:17 +0200 Subject: [PATCH] Add turkish normalization --- meilisearch-types/Cargo.toml | 5 ++++- meilisearch/Cargo.toml | 1 + milli/Cargo.toml | 4 ++++ 3 files changed, 9 insertions(+), 1 deletion(-) diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index cb4937e57..0dae024f2 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -66,5 +66,8 @@ khmer = ["milli/khmer"] vietnamese = ["milli/vietnamese"] # force swedish character recomposition swedish-recomposition = ["milli/swedish-recomposition"] -# force german character recomposition +# allow german tokenization german = ["milli/german"] +# allow turkish normalization +turkish = ["milli/turkish"] + diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 2a16e1017..c193c89d4 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -154,6 +154,7 @@ khmer = ["meilisearch-types/khmer"] vietnamese = ["meilisearch-types/vietnamese"] swedish-recomposition = ["meilisearch-types/swedish-recomposition"] german = ["meilisearch-types/german"] +turkish = ["meilisearch-types/turkish"] [package.metadata.mini-dashboard] assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.14/build.zip" diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 5fc2d65c8..70d09ce4e 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -108,6 +108,7 @@ all-tokenizations = [ "charabia/vietnamese", "charabia/swedish-recomposition", "charabia/german-segmentation", + "charabia/turkish", ] # Use POSIX semaphores instead of SysV semaphores in LMDB @@ -146,5 +147,8 @@ german = ["charabia/german-segmentation"] # force swedish character recomposition swedish-recomposition = ["charabia/swedish-recomposition"] +# allow turkish specialized tokenization +turkish = ["charabia/turkish"] + # allow CUDA support, see cuda = ["candle-core/cuda"]