From 8a4d05b7bbb033e0a8f6bd93d89f92450d357c55 Mon Sep 17 00:00:00 2001 From: mpostma Date: Thu, 19 Nov 2020 16:00:14 +0100 Subject: [PATCH] remove meilisearch tokenizer --- Cargo.lock | 80 +++++++++++++++------------ Cargo.toml | 1 - meilisearch-core/Cargo.toml | 1 - meilisearch-core/src/automaton/mod.rs | 1 - meilisearch-core/src/raw_indexer.rs | 1 - meilisearch-http/Cargo.toml | 1 - 6 files changed, 44 insertions(+), 41 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 77b660188..f48920953 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -210,7 +210,7 @@ dependencies = [ "rustls 0.18.1", "tokio-rustls", "webpki", - "webpki-roots 0.20.0", + "webpki-roots", ] [[package]] @@ -332,7 +332,7 @@ checksum = "d4d7d63395147b81a9e570bcc6243aaf71c017bd666d4909cfef0085bdda8d73" [[package]] name = "assert-json-diff" version = "1.0.1" -source = "git+https://github.com/qdequele/assert-json-diff#9012a0c8866d0f2db0ef9a6242e4a19d1e8c67e4" +source = "git+https://github.com/qdequele/assert-json-diff?branch=master#9012a0c8866d0f2db0ef9a6242e4a19d1e8c67e4" dependencies = [ "serde", "serde_json", @@ -383,7 +383,7 @@ dependencies = [ "actix-rt", "actix-service", "base64 0.13.0", - "bytes 0.5.6", + "bytes", "cfg-if 1.0.0", "derive_more", "futures-core", @@ -790,6 +790,12 @@ dependencies = [ "memchr", ] +[[package]] +name = "data-encoding" +version = "2.3.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "993a608597367c6377b258c25d7120740f00ed23a2252b729b1932dd7866f908" + [[package]] name = "debugid" version = "0.7.2" @@ -962,16 +968,6 @@ dependencies = [ "percent-encoding", ] -[[package]] -name = "fs2" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "9564fc758e15025b46aa6643b1b77d047d1a56a1aea6e01002ac0c7026876213" -dependencies = [ - "libc", - "winapi 0.3.9", -] - [[package]] name = "fs_extra" version = "1.2.0" @@ -1229,9 +1225,9 @@ checksum = "b328f6260a7e51bdb0ca6b68e6ea27ee3d11fba5dee930896ee7ff6ad5fc072c" [[package]] name = "heed-types" -version = "0.7.2" +version = "0.7.1" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e628efb08beaee58355f80dc4adba79d644940ea9eef60175ea17dc218aab405" +checksum = "72fc61caee13e85ea330eabf0c6c7098c511ff173bcb57a760b1eda3bba9f6eb" dependencies = [ "bincode", "heed-traits", @@ -1516,9 +1512,9 @@ dependencies = [ [[package]] name = "libc" -version = "0.2.81" +version = "0.2.80" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "1482821306169ec4d07f6aca392a4681f66c75c9918aa49641a2595db64053cb" +checksum = "4d58d1b70b004888f764dfbf6a26a3b0342a1632d33968e4a179d8011c760614" [[package]] name = "linked-hash-map" @@ -1616,7 +1612,6 @@ dependencies = [ "log", "meilisearch-error", "meilisearch-schema", - "meilisearch-tokenizer", "meilisearch-types", "once_cell", "ordered-float", @@ -1666,7 +1661,6 @@ dependencies = [ "meilisearch-core", "meilisearch-error", "meilisearch-schema", - "meilisearch-tokenizer", "mime", "once_cell", "rand 0.8.1", @@ -1702,14 +1696,6 @@ dependencies = [ "zerocopy", ] -[[package]] -name = "meilisearch-tokenizer" -version = "0.17.0" -dependencies = [ - "deunicode", - "slice-group-by", -] - [[package]] name = "meilisearch-types" version = "0.17.0" @@ -2353,6 +2339,7 @@ dependencies = [ "url", "wasm-bindgen", "wasm-bindgen-futures", + "wasm-bindgen-test", "web-sys", "webpki-roots 0.20.0", "winreg 0.7.0", @@ -2459,6 +2446,12 @@ dependencies = [ "winapi-util", ] +[[package]] +name = "scoped-tls" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ea6a9290e3c9cf0f18145ef7ffa62d68ee0bf5fcd651017e586dc7fd5da448c2" + [[package]] name = "scopeguard" version = "1.1.0" @@ -3388,6 +3381,30 @@ name = "web-sys" version = "0.3.46" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "222b1ef9334f92a21d3fb53dc3fd80f30836959a90f9274a626d7e06315ba3c3" +dependencies = [ + "console_error_panic_hook", + "js-sys", + "scoped-tls", + "wasm-bindgen", + "wasm-bindgen-futures", + "wasm-bindgen-test-macro", +] + +[[package]] +name = "webpki" +version = "0.21.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8e38c0608262c46d4a56202ebabdeb094cef7e560ca7a226c6bf055188aa4ea" +dependencies = [ + "proc-macro2", + "quote", +] + +[[package]] +name = "webpki-roots" +version = "0.20.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "0f20dea7535251981a9670857150d571846545088359b28e4951d350bdaf179f" dependencies = [ "js-sys", "wasm-bindgen", @@ -3403,15 +3420,6 @@ dependencies = [ "untrusted", ] -[[package]] -name = "webpki-roots" -version = "0.20.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "0f20dea7535251981a9670857150d571846545088359b28e4951d350bdaf179f" -dependencies = [ - "webpki", -] - [[package]] name = "webpki-roots" version = "0.21.0" diff --git a/Cargo.toml b/Cargo.toml index 9356916b6..913ab34c8 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -3,7 +3,6 @@ members = [ "meilisearch-core", "meilisearch-http", "meilisearch-schema", - "meilisearch-tokenizer", "meilisearch-types", ] diff --git a/meilisearch-core/Cargo.toml b/meilisearch-core/Cargo.toml index 8687c7814..dbd369000 100644 --- a/meilisearch-core/Cargo.toml +++ b/meilisearch-core/Cargo.toml @@ -26,7 +26,6 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } log = "0.4.11" meilisearch-error = { path = "../meilisearch-error", version = "0.17.0" } meilisearch-schema = { path = "../meilisearch-schema", version = "0.17.0" } -meilisearch-tokenizer = { path = "../meilisearch-tokenizer", version = "0.17.0" } meilisearch-types = { path = "../meilisearch-types", version = "0.17.0" } once_cell = "1.5.2" ordered-float = { version = "2.0.1", features = ["serde"] } diff --git a/meilisearch-core/src/automaton/mod.rs b/meilisearch-core/src/automaton/mod.rs index e7cb9733b..c47645041 100644 --- a/meilisearch-core/src/automaton/mod.rs +++ b/meilisearch-core/src/automaton/mod.rs @@ -1,6 +1,5 @@ mod dfa; -use meilisearch_tokenizer::is_cjk; pub use self::dfa::{build_dfa, build_prefix_dfa, build_exact_dfa}; diff --git a/meilisearch-core/src/raw_indexer.rs b/meilisearch-core/src/raw_indexer.rs index 89c62a3d4..471d0cfff 100644 --- a/meilisearch-core/src/raw_indexer.rs +++ b/meilisearch-core/src/raw_indexer.rs @@ -4,7 +4,6 @@ use std::convert::TryFrom; use deunicode::deunicode_with_tofu; use meilisearch_schema::IndexedPos; -use meilisearch_tokenizer::{is_cjk, SeqTokenizer, Token, Tokenizer}; use sdset::SetBuf; use crate::{DocIndex, DocumentId}; diff --git a/meilisearch-http/Cargo.toml b/meilisearch-http/Cargo.toml index 30dc4f7d9..6c066e393 100644 --- a/meilisearch-http/Cargo.toml +++ b/meilisearch-http/Cargo.toml @@ -35,7 +35,6 @@ main_error = "0.1.1" meilisearch-core = { path = "../meilisearch-core", version = "0.17.0" } meilisearch-error = { path = "../meilisearch-error", version = "0.17.0" } meilisearch-schema = { path = "../meilisearch-schema", version = "0.17.0" } -meilisearch-tokenizer = {path = "../meilisearch-tokenizer", version = "0.17.0"} mime = "0.3.16" once_cell = "1.5.2" rand = "0.8.1"