Allow building without specialized tokenizations

(Some of) these specialized tokenizations include huge dictionaries
that currently account for 90% (!) of the meilisearch binary size.

This commit adds chinese, hebrew, japanese, and thai feature flags
that are propagated via milli down to the charabia crate. To keep it
backward compatible, they are enabled by default.

Related to meilisearch/milli#632
This commit is contained in:
Jakub Jirutka 2022-09-14 20:57:13 +02:00
parent 5b57114771
commit 935f18efcf
3 changed files with 24 additions and 4 deletions

View File

@ -7,7 +7,7 @@ edition = "2021"
enum-iterator = "0.7.0"
hmac = "0.12.1"
meilisearch-types = { path = "../meilisearch-types" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false }
rand = "0.8.4"
serde = { version = "1.0.136", features = ["derive"] }
serde_json = { version = "1.0.85", features = ["preserve_order"] }

View File

@ -46,7 +46,7 @@ jsonwebtoken = "8.0.1"
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
meilisearch-lib = { path = "../meilisearch-lib" }
meilisearch-lib = { path = "../meilisearch-lib", default-features = false }
mimalloc = { version = "0.1.29", default-features = false }
mime = "0.3.16"
num_cpus = "1.13.1"
@ -90,7 +90,7 @@ urlencoding = "2.1.0"
yaup = "0.2.0"
[features]
default = ["analytics", "mini-dashboard"]
default = ["analytics", "meilisearch-lib/default", "mini-dashboard"]
metrics = ["prometheus"]
analytics = ["segment"]
mini-dashboard = [
@ -104,6 +104,10 @@ mini-dashboard = [
"tempfile",
"zip",
]
chinese = ["meilisearch-lib/chinese"]
hebrew = ["meilisearch-lib/hebrew"]
japanese = ["meilisearch-lib/japanese"]
thai = ["meilisearch-lib/thai"]
[package.metadata.mini-dashboard]
assets-url = "https://github.com/meilisearch/mini-dashboard/releases/download/v0.2.1/build.zip"

View File

@ -28,7 +28,7 @@ lazy_static = "1.4.0"
log = "0.4.14"
meilisearch-auth = { path = "../meilisearch-auth" }
meilisearch-types = { path = "../meilisearch-types" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.33.4", default-features = false }
mime = "0.3.16"
num_cpus = "1.13.1"
obkv = "0.2.0"
@ -64,3 +64,19 @@ nelson = { git = "https://github.com/meilisearch/nelson.git", rev = "675f1388554
paste = "1.0.6"
proptest = "1.0.0"
proptest-derive = "0.3.0"
[features]
# all specialized tokenizations
default = ["milli/default"]
# chinese specialized tokenization
chinese = ["milli/chinese"]
# hebrew specialized tokenization
hebrew = ["milli/hebrew"]
# japanese specialized tokenization
japanese = ["milli/japanese"]
# thai specialized tokenization
thai = ["milli/thai"]