4269: Remove dependency that requires libstdc++ r=dureuill a=dureuill

Removes the dependency that caused the additional runtime dependency on libstdc++ by disabling the default features of the hf tokenizer.

## Discussion

- This removes a feature that is using a C++ dependency and is supposed to accelerate the tokenizer. As the tokenizer is likely to be a significant bottleneck for embedding texts using a HF model, this is an issue.
- We should at least rerun the movies vector indexing and check that it still works correctly and that it has a runtime in the ballpark of what it used to be.

Co-authored-by: Louis Dureuil <louis.dureuil@xinra.net>
This commit is contained in:
meili-bors[bot] 2023-12-19 12:26:48 +00:00 committed by GitHub
commit fb9db1eba6
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
3 changed files with 2 additions and 7 deletions

5
Cargo.lock generated
View File

@ -1592,9 +1592,6 @@ name = "esaxx-rs"
version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d817e038c30374a4bcb22f94d0a8a0e216958d4c3dcde369b1439fec4bdda6e6"
dependencies = [
"cc",
]
[[package]]
name = "fancy-regex"
@ -5310,11 +5307,9 @@ version = "0.14.1"
source = "git+https://github.com/huggingface/tokenizers.git?tag=v0.14.1#6357206cdcce4d78ffb1e0372feb456caea09375"
dependencies = [
"aho-corasick",
"clap",
"derive_builder",
"esaxx-rs",
"getrandom",
"indicatif",
"itertools 0.11.0",
"lazy_static",
"log",

View File

@ -26,7 +26,7 @@ ENV MEILI_HTTP_ADDR 0.0.0.0:7700
ENV MEILI_SERVER_PROVIDER docker
RUN apk update --quiet \
&& apk add -q --no-cache libgcc libstdc++ tini curl
&& apk add -q --no-cache libgcc tini curl
# add meilisearch and meilitool to the `/bin` so you can run it from anywhere
# and it's easy to find.

View File

@ -77,7 +77,7 @@ csv = "1.2.1"
candle-core = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
candle-transformers = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
candle-nn = { git = "https://github.com/huggingface/candle.git", version = "0.3.1" }
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1" }
tokenizers = { git = "https://github.com/huggingface/tokenizers.git", tag = "v0.14.1", version = "0.14.1", default_features = false, features = ["onig"] }
hf-hub = { git = "https://github.com/dureuill/hf-hub.git", branch = "rust_tls", default_features = false, features = [
"online",
] }