From 13f127763799ed76098a52fb61fb8df5c31196cb Mon Sep 17 00:00:00 2001 From: Jakub Jirutka Date: Mon, 24 Apr 2023 00:26:08 +0200 Subject: [PATCH] Allow to disable specialized tokenizations (again) In PR #2773, I added the `chinese`, `hebrew`, `japanese` and `thai` feature flags to allow melisearch to be built without huge specialed tokenizations that took up 90% of the melisearch binary size. Unfortunately, due to some recent changes, this doesn't work anymore. The problem lies in excessive use of the `default` feature flag, which infects the dependency graph. Instead of adding `default-features = false` here and there, it's easier and more future-proof to not declare `default` in `milli` and `meilisearch-types`. I've renamed it to `all-tokenizers`, which also makes it a bit clearer what it's about. --- benchmarks/Cargo.toml | 4 ++-- meilisearch-types/Cargo.toml | 4 ++-- meilisearch/Cargo.toml | 2 +- milli/Cargo.toml | 2 +- milli/src/search/mod.rs | 2 +- milli/src/search/new/tests/mod.rs | 2 +- milli/src/update/index_documents/mod.rs | 2 +- 7 files changed, 9 insertions(+), 9 deletions(-) diff --git a/benchmarks/Cargo.toml b/benchmarks/Cargo.toml index 3a100b034..aa4229e05 100644 --- a/benchmarks/Cargo.toml +++ b/benchmarks/Cargo.toml @@ -13,7 +13,7 @@ license.workspace = true [dependencies] anyhow = "1.0.70" csv = "1.2.1" -milli = { path = "../milli", default-features = false } +milli = { path = "../milli" } mimalloc = { version = "0.1.36", default-features = false } serde_json = { version = "1.0.95", features = ["preserve_order"] } @@ -31,7 +31,7 @@ flate2 = "1.0.25" reqwest = { version = "0.11.16", features = ["blocking", "rustls-tls"], default-features = false } [features] -default = ["milli/default"] +default = ["milli/all-tokenizations"] [[bench]] name = "search_songs" diff --git a/meilisearch-types/Cargo.toml b/meilisearch-types/Cargo.toml index 77a3fd53b..a3221e6b2 100644 --- a/meilisearch-types/Cargo.toml +++ b/meilisearch-types/Cargo.toml @@ -22,7 +22,7 @@ file-store = { path = "../file-store" } flate2 = "1.0.25" fst = "0.4.7" memmap2 = "0.5.10" -milli = { path = "../milli", default-features = false } +milli = { path = "../milli" } roaring = { version = "0.10.1", features = ["serde"] } serde = { version = "1.0.160", features = ["derive"] } serde-cs = "0.2.4" @@ -40,7 +40,7 @@ meili-snap = { path = "../meili-snap" } [features] # all specialized tokenizations -default = ["milli/default"] +all-tokenizations = ["milli/all-tokenizations"] # chinese specialized tokenization chinese = ["milli/chinese"] diff --git a/meilisearch/Cargo.toml b/meilisearch/Cargo.toml index 80c5ee6c1..8fcd69591 100644 --- a/meilisearch/Cargo.toml +++ b/meilisearch/Cargo.toml @@ -106,7 +106,7 @@ vergen = { version = "7.5.1", default-features = false, features = ["git"] } zip = { version = "0.6.4", optional = true } [features] -default = ["analytics", "meilisearch-types/default", "mini-dashboard"] +default = ["analytics", "meilisearch-types/all-tokenizations", "mini-dashboard"] analytics = ["segment"] mini-dashboard = ["actix-web-static-files", "static-files", "anyhow", "cargo_toml", "hex", "reqwest", "sha-1", "tempfile", "zip"] chinese = ["meilisearch-types/chinese"] diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 3fc347174..de0f4e31d 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -69,7 +69,7 @@ rand = {version = "0.8.5", features = ["small_rng"] } fuzzcheck = "0.12.1" [features] -default = [ "charabia/default" ] +all-tokenizations = [ "charabia/default" ] # Use POSIX semaphores instead of SysV semaphores in LMDB # For more information on this feature, see heed's Cargo.toml diff --git a/milli/src/search/mod.rs b/milli/src/search/mod.rs index 97725b9bf..92777cabc 100644 --- a/milli/src/search/mod.rs +++ b/milli/src/search/mod.rs @@ -204,7 +204,7 @@ mod test { use super::*; use crate::index::tests::TempIndex; - #[cfg(feature = "default")] + #[cfg(feature = "japanese")] #[test] fn test_kanji_language_detection() { let index = TempIndex::new(); diff --git a/milli/src/search/new/tests/mod.rs b/milli/src/search/new/tests/mod.rs index 906aeda83..e500d16fb 100644 --- a/milli/src/search/new/tests/mod.rs +++ b/milli/src/search/new/tests/mod.rs @@ -4,7 +4,7 @@ pub mod distinct; pub mod exactness; pub mod geo_sort; pub mod integration; -#[cfg(feature = "default")] +#[cfg(feature = "all-tokenizations")] pub mod language; pub mod ngram_split_words; pub mod proximity; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 94fd17764..bbfa1d00c 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -1581,7 +1581,7 @@ mod tests { assert_eq!(count, 4); } - #[cfg(feature = "default")] + #[cfg(feature = "chinese")] #[test] fn test_meilisearch_1714() { let index = TempIndex::new();