2021-02-12 16:15:09 +01:00
|
|
|
[package]
|
|
|
|
name = "milli"
|
2024-07-09 11:25:39 -04:00
|
|
|
edition = "2021"
|
2023-02-15 13:51:07 +01:00
|
|
|
publish = false
|
|
|
|
|
|
|
|
version.workspace = true
|
|
|
|
authors.workspace = true
|
|
|
|
description.workspace = true
|
|
|
|
homepage.workspace = true
|
|
|
|
readme.workspace = true
|
|
|
|
# edition.workspace = true
|
|
|
|
license.workspace = true
|
2021-02-12 16:15:09 +01:00
|
|
|
|
|
|
|
[dependencies]
|
2023-04-11 15:23:51 +02:00
|
|
|
bimap = { version = "0.6.3", features = ["serde"] }
|
2021-08-31 11:44:15 +02:00
|
|
|
bincode = "1.3.3"
|
2024-07-08 18:09:12 +02:00
|
|
|
bstr = "1.9.1"
|
2024-10-29 02:46:14 +01:00
|
|
|
bytemuck = { version = "1.18.0", features = ["extern_crate_alloc"] }
|
2024-01-16 15:05:03 +01:00
|
|
|
byteorder = "1.5.0"
|
2024-09-19 16:08:59 +02:00
|
|
|
charabia = { version = "0.9.1", default-features = false }
|
2021-07-06 11:31:24 +02:00
|
|
|
concat-arrays = "0.1.2"
|
2024-07-08 18:09:12 +02:00
|
|
|
crossbeam-channel = "0.5.13"
|
|
|
|
deserr = "0.6.2"
|
|
|
|
either = { version = "1.13.0", features = ["serde"] }
|
2022-06-02 15:46:44 +02:00
|
|
|
flatten-serde-json = { path = "../flatten-serde-json" }
|
2022-03-14 17:00:53 +01:00
|
|
|
fst = "0.4.7"
|
2021-02-12 16:15:09 +01:00
|
|
|
fxhash = "0.2.1"
|
2022-10-04 11:29:39 +02:00
|
|
|
geoutils = "0.5.1"
|
2024-07-08 18:09:12 +02:00
|
|
|
grenad = { version = "0.4.7", default-features = false, features = [
|
2023-11-15 15:46:37 +01:00
|
|
|
"rayon",
|
|
|
|
"tempfile",
|
2023-05-15 11:42:30 +02:00
|
|
|
] }
|
2024-07-08 18:09:12 +02:00
|
|
|
heed = { version = "0.20.3", default-features = false, features = [
|
2023-11-15 15:46:37 +01:00
|
|
|
"serde-json",
|
|
|
|
"serde-bincode",
|
|
|
|
"read-txn-no-tls",
|
2023-05-15 11:42:30 +02:00
|
|
|
] }
|
2024-07-08 18:09:12 +02:00
|
|
|
indexmap = { version = "2.2.6", features = ["serde"] }
|
2022-04-12 11:22:36 +02:00
|
|
|
json-depth-checker = { path = "../json-depth-checker" }
|
2022-03-14 17:00:53 +01:00
|
|
|
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
2024-07-17 11:13:37 +02:00
|
|
|
memchr = "2.5.0"
|
2024-07-08 18:31:15 +02:00
|
|
|
memmap2 = "0.9.4"
|
2024-07-08 18:09:12 +02:00
|
|
|
obkv = "0.2.2"
|
2024-01-16 15:05:03 +01:00
|
|
|
once_cell = "1.19.0"
|
2024-07-08 18:09:12 +02:00
|
|
|
ordered-float = "4.2.1"
|
|
|
|
rayon = "1.10.0"
|
|
|
|
roaring = { version = "0.10.6", features = ["serde"] }
|
2024-07-08 18:31:15 +02:00
|
|
|
rstar = { version = "0.12.0", features = ["serde"] }
|
2024-07-08 18:09:12 +02:00
|
|
|
serde = { version = "1.0.204", features = ["derive"] }
|
|
|
|
serde_json = { version = "1.0.120", features = ["preserve_order"] }
|
2024-01-16 15:05:03 +01:00
|
|
|
slice-group-by = "0.3.1"
|
2023-05-15 11:42:30 +02:00
|
|
|
smallstr = { version = "0.3.0", features = ["serde"] }
|
2024-07-08 18:09:12 +02:00
|
|
|
smallvec = "1.13.2"
|
2022-04-11 15:43:18 +02:00
|
|
|
smartstring = "1.0.1"
|
2024-07-08 18:09:12 +02:00
|
|
|
tempfile = "3.10.1"
|
|
|
|
thiserror = "1.0.61"
|
|
|
|
time = { version = "0.3.36", features = [
|
2023-05-15 11:42:30 +02:00
|
|
|
"serde-well-known",
|
|
|
|
"formatting",
|
|
|
|
"parsing",
|
|
|
|
"macros",
|
|
|
|
] }
|
2024-07-10 13:46:24 +02:00
|
|
|
uuid = { version = "1.10.0", features = ["v4"] }
|
2021-02-12 16:15:09 +01:00
|
|
|
|
2021-11-09 16:16:28 +01:00
|
|
|
filter-parser = { path = "../filter-parser" }
|
2021-02-12 16:15:09 +01:00
|
|
|
|
|
|
|
# documents words self-join
|
2024-07-08 18:31:15 +02:00
|
|
|
itertools = "0.13.0"
|
2021-02-12 16:15:09 +01:00
|
|
|
|
2024-01-16 15:05:03 +01:00
|
|
|
csv = "1.3.0"
|
2024-07-08 18:31:15 +02:00
|
|
|
candle-core = { version = "0.6.0" }
|
|
|
|
candle-transformers = { version = "0.6.0" }
|
|
|
|
candle-nn = { version = "0.6.0" }
|
2024-06-13 17:47:44 +02:00
|
|
|
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.15.2", version = "0.15.2", default-features = false, features = [
|
2024-03-05 11:05:20 +01:00
|
|
|
"onig",
|
|
|
|
] }
|
2024-06-13 17:47:44 +02:00
|
|
|
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default-features = false, features = [
|
2023-12-12 10:05:06 +01:00
|
|
|
"online",
|
|
|
|
] }
|
2024-07-08 18:09:12 +02:00
|
|
|
tiktoken-rs = "0.5.9"
|
|
|
|
liquid = "0.26.6"
|
2024-09-26 15:04:03 +02:00
|
|
|
rhai = { git = "https://github.com/rhaiscript/rhai", rev = "ef3df63121d27aacd838f366f2b83fd65f20a1e4", features = ["serde", "no_module", "no_custom_syntax", "no_time", "sync"] }
|
2024-09-30 13:12:01 +02:00
|
|
|
arroy = "0.5.0"
|
2023-12-07 13:33:15 +01:00
|
|
|
rand = "0.8.5"
|
2024-01-23 09:42:48 +01:00
|
|
|
tracing = "0.1.40"
|
2024-07-08 18:09:12 +02:00
|
|
|
ureq = { version = "2.10.0", features = ["json"] }
|
|
|
|
url = "2.5.2"
|
2024-05-12 14:45:25 +02:00
|
|
|
rayon-par-bridge = "0.1.0"
|
2021-02-12 16:15:09 +01:00
|
|
|
|
|
|
|
[dev-dependencies]
|
2024-07-08 18:09:12 +02:00
|
|
|
mimalloc = { version = "0.1.43", default-features = false }
|
2021-03-11 11:48:55 +01:00
|
|
|
big_s = "1.0.2"
|
2024-07-08 18:09:12 +02:00
|
|
|
insta = "1.39.0"
|
2021-02-12 16:15:09 +01:00
|
|
|
maplit = "1.0.2"
|
2022-08-03 08:45:26 +02:00
|
|
|
md5 = "0.7.0"
|
2023-09-18 09:59:38 +02:00
|
|
|
meili-snap = { path = "../meili-snap" }
|
2023-05-15 11:42:30 +02:00
|
|
|
rand = { version = "0.8.5", features = ["small_rng"] }
|
2021-02-12 16:15:09 +01:00
|
|
|
|
|
|
|
[features]
|
2024-01-25 18:58:52 +01:00
|
|
|
all-tokenizations = [
|
2024-09-25 11:12:30 +02:00
|
|
|
"charabia/default",
|
2024-01-25 18:58:52 +01:00
|
|
|
]
|
2022-09-08 12:19:44 +02:00
|
|
|
|
2023-01-19 12:08:38 -05:00
|
|
|
# Use POSIX semaphores instead of SysV semaphores in LMDB
|
|
|
|
# For more information on this feature, see heed's Cargo.toml
|
|
|
|
lmdb-posix-sem = ["heed/posix-sem"]
|
|
|
|
|
2022-09-08 12:19:44 +02:00
|
|
|
# allow chinese specialized tokenization
|
|
|
|
chinese = ["charabia/chinese"]
|
2024-04-18 11:38:26 +02:00
|
|
|
chinese-pinyin = ["chinese", "charabia/chinese-normalization-pinyin"]
|
2022-09-08 12:19:44 +02:00
|
|
|
|
|
|
|
# allow hebrew specialized tokenization
|
|
|
|
hebrew = ["charabia/hebrew"]
|
|
|
|
|
|
|
|
# allow japanese specialized tokenization
|
|
|
|
japanese = ["charabia/japanese"]
|
2022-12-12 14:53:08 +01:00
|
|
|
japanese-transliteration = ["charabia/japanese-transliteration"]
|
|
|
|
|
|
|
|
# allow korean specialized tokenization
|
|
|
|
korean = ["charabia/korean"]
|
2022-09-08 12:19:44 +02:00
|
|
|
|
|
|
|
# allow thai specialized tokenization
|
|
|
|
thai = ["charabia/thai"]
|
2023-04-26 14:56:54 +02:00
|
|
|
|
|
|
|
# allow greek specialized tokenization
|
2023-04-26 14:58:32 +02:00
|
|
|
greek = ["charabia/greek"]
|
2023-10-26 17:01:10 +02:00
|
|
|
|
|
|
|
# allow khmer specialized tokenization
|
|
|
|
khmer = ["charabia/khmer"]
|
2024-01-07 21:27:29 +01:00
|
|
|
|
2024-04-30 14:30:23 +02:00
|
|
|
# allow vietnamese specialized tokenization
|
2024-01-25 18:58:52 +01:00
|
|
|
vietnamese = ["charabia/vietnamese"]
|
|
|
|
|
2024-09-19 13:30:07 +02:00
|
|
|
# allow german specialized tokenization
|
|
|
|
german = ["charabia/german-segmentation"]
|
|
|
|
|
2024-04-30 14:30:23 +02:00
|
|
|
# force swedish character recomposition
|
|
|
|
swedish-recomposition = ["charabia/swedish-recomposition"]
|
|
|
|
|
2024-09-25 11:03:17 +02:00
|
|
|
# allow turkish specialized tokenization
|
|
|
|
turkish = ["charabia/turkish"]
|
|
|
|
|
2024-01-22 10:13:27 +01:00
|
|
|
# allow CUDA support, see <https://github.com/meilisearch/meilisearch/issues/4306>
|
2024-01-07 21:27:29 +01:00
|
|
|
cuda = ["candle-core/cuda"]
|