Allow to disable specialized tokenizations (again)

In PR #2773, I added the `chinese`, `hebrew`, `japanese` and `thai`
feature flags to allow melisearch to be built without huge specialed
tokenizations that took up 90% of the melisearch binary size.
Unfortunately, due to some recent changes, this doesn't work anymore.
The problem lies in excessive use of the `default` feature flag, which
infects the dependency graph.

Instead of adding `default-features = false` here and there, it's easier
and more future-proof to not declare `default` in `milli` and
`meilisearch-types`. I've renamed it to `all-tokenizers`, which also
makes it a bit clearer what it's about.
This commit is contained in:
Jakub Jirutka 2023-04-24 00:26:08 +02:00
parent a95128df6b
commit 13f1277637
7 changed files with 9 additions and 9 deletions

View File

@ -13,7 +13,7 @@ license.workspace = true
[dependencies]
anyhow = "1.0.70"
csv = "1.2.1"
milli = { path = "../milli", default-features = false }
milli = { path = "../milli" }
mimalloc = { version = "0.1.36", default-features = false }
serde_json = { version = "1.0.95", features = ["preserve_order"] }
@ -31,7 +31,7 @@ flate2 = "1.0.25"
reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false }
[features]
default = ["milli/default"]
default = ["milli/all-tokenizations"]
[[bench]]
name = "search_songs"

View File

@ -22,7 +22,7 @@ file-store = { path = "../file-store" }
flate2 = "1.0.25"
fst = "0.4.7"
memmap2 = "0.5.10"
milli = { path = "../milli", default-features = false }
milli = { path = "../milli" }
roaring = { version = "0.10.1", features = ["serde"] }
serde = { version = "1.0.160", features = ["derive"] }
serde-cs = "0.2.4"
@ -40,7 +40,7 @@ meili-snap = { path = "../meili-snap" }
[features]
# all specialized tokenizations
default = ["milli/default"]
all-tokenizations = ["milli/all-tokenizations"]
# chinese specialized tokenization
chinese = ["milli/chinese"]

View File

@ -106,7 +106,7 @@ vergen = { version = "7.5.1", default-features = false, features = ["git"] }
zip = { version = "0.6.4", optional = true }
[features]
default = ["analytics", "meilisearch-types/default", "mini-dashboard"]
default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"]
analytics = ["segment"]
mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"]
chinese = ["meilisearch-types/chinese"]

View File

@ -69,7 +69,7 @@ rand = {version = "0.8.5", features = ["small_rng"] }
fuzzcheck = "0.12.1"
[features]
default = [ "charabia/default" ]
all-tokenizations = [ "charabia/default" ]
# Use POSIX semaphores instead of SysV semaphores in LMDB
# For more information on this feature, see heed's Cargo.toml

View File

@ -204,7 +204,7 @@ mod test {
use super::*;
use crate::index::tests::TempIndex;
#[cfg(feature = "default")]
#[cfg(feature = "japanese")]
#[test]
fn test_kanji_language_detection() {
let index = TempIndex::new();

View File

@ -4,7 +4,7 @@ pub mod distinct;
pub mod exactness;
pub mod geo_sort;
pub mod integration;
#[cfg(feature = "default")]
#[cfg(feature = "all-tokenizations")]
pub mod language;
pub mod ngram_split_words;
pub mod proximity;

View File

@ -1581,7 +1581,7 @@ mod tests {
assert_eq!(count, 4);
}
#[cfg(feature = "default")]
#[cfg(feature = "chinese")]
#[test]
fn test_meilisearch_1714() {
let index = TempIndex::new();