From 26bdabcdec2f6f2e37b9e9afa56e26cdd21b572b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 17 Nov 2019 11:14:01 +0100 Subject: [PATCH 1/2] Do not use a forked fst dependency --- Cargo.lock | 16 ++++++++-------- meilidb-core/Cargo.toml | 11 ++--------- meilidb-core/src/store/docs_words.rs | 2 +- meilidb-core/src/store/main.rs | 6 +++--- meilidb-core/src/store/synonyms.rs | 2 +- 5 files changed, 15 insertions(+), 22 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index e15bea55c..02ab5768c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -440,8 +440,8 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "fst" -version = "0.3.3" -source = "git+https://github.com/Kerollmops/fst.git?branch=arc-byte-slice#374512e91e9de1cb84f29d89a318c53720387816" +version = "0.3.5" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", @@ -797,9 +797,9 @@ source = "registry+https://github.com/rust-lang/crates.io-index" [[package]] name = "levenshtein_automata" version = "0.1.1" -source = "git+https://github.com/Kerollmops/levenshtein-automata.git?branch=arc-byte-slice#f85a32acf82727966191b81e1f921e041b5f970d" +source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ - "fst 0.3.3 (git+https://github.com/Kerollmops/fst.git?branch=arc-byte-slice)", + "fst 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -869,11 +869,11 @@ dependencies = [ "csv 1.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "deunicode 1.0.0 (registry+https://github.com/rust-lang/crates.io-index)", "env_logger 0.7.1 (registry+https://github.com/rust-lang/crates.io-index)", - "fst 0.3.3 (git+https://github.com/Kerollmops/fst.git?branch=arc-byte-slice)", + "fst 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)", "hashbrown 0.6.3 (registry+https://github.com/rust-lang/crates.io-index)", "heed 0.5.0 (registry+https://github.com/rust-lang/crates.io-index)", "indexmap 1.3.0 (registry+https://github.com/rust-lang/crates.io-index)", - "levenshtein_automata 0.1.1 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=arc-byte-slice)", + "levenshtein_automata 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)", "log 0.4.8 (registry+https://github.com/rust-lang/crates.io-index)", "meilidb-schema 0.6.0", "meilidb-tokenizer 0.6.1", @@ -2323,7 +2323,7 @@ dependencies = [ "checksum flate2 1.0.12 (registry+https://github.com/rust-lang/crates.io-index)" = "ad3c5233c9a940c8719031b423d7e6c16af66e031cb0420b0896f5245bf181d3" "checksum fnv 1.0.6 (registry+https://github.com/rust-lang/crates.io-index)" = "2fad85553e09a6f881f739c29f0b00b0f01357c743266d478b68951ce23285f3" "checksum fs_extra 1.1.0 (registry+https://github.com/rust-lang/crates.io-index)" = "5f2a4a2034423744d2cc7ca2068453168dcdb82c438419e639a26bd87839c674" -"checksum fst 0.3.3 (git+https://github.com/Kerollmops/fst.git?branch=arc-byte-slice)" = "" +"checksum fst 0.3.5 (registry+https://github.com/rust-lang/crates.io-index)" = "927fb434ff9f0115b215dc0efd2e4fbdd7448522a92a1aa37c77d6a2f8f1ebd6" "checksum fuchsia-cprng 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "a06f77d526c1a601b7c4cdd98f54b5eaabffc14d5f2f0296febdc7f357c6d3ba" "checksum fuchsia-zircon 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "2e9763c69ebaae630ba35f74888db465e49e259ba1bc0eda7d06f4a067615d82" "checksum fuchsia-zircon-sys 0.3.3 (registry+https://github.com/rust-lang/crates.io-index)" = "3dcaa9ae7725d12cdb85b3ad99a434db70b468c09ded17e012d86b5c1010f7a7" @@ -2361,7 +2361,7 @@ dependencies = [ "checksum jobserver 0.1.17 (registry+https://github.com/rust-lang/crates.io-index)" = "f2b1d42ef453b30b7387e113da1c83ab1605d90c5b4e0eb8e96d016ed3b8c160" "checksum kernel32-sys 0.2.2 (registry+https://github.com/rust-lang/crates.io-index)" = "7507624b29483431c0ba2d82aece8ca6cdba9382bff4ddd0f7490560c056098d" "checksum lazy_static 1.4.0 (registry+https://github.com/rust-lang/crates.io-index)" = "e2abad23fbc42b3700f2f279844dc832adb2b2eb069b2df918f455c4e18cc646" -"checksum levenshtein_automata 0.1.1 (git+https://github.com/Kerollmops/levenshtein-automata.git?branch=arc-byte-slice)" = "" +"checksum levenshtein_automata 0.1.1 (registry+https://github.com/rust-lang/crates.io-index)" = "73a004f877f468548d8d0ac4977456a249d8fabbdb8416c36db163dfc8f2e8ca" "checksum libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)" = "1a31a0627fdf1f6a39ec0dd577e101440b7db22672c0901fe00a9a6fbb5c24e8" "checksum lmdb-rkv-sys 0.9.6 (registry+https://github.com/rust-lang/crates.io-index)" = "7982ba0460e939e26a52ee12c8075deab0ebd44ed21881f656841b70e021b7c8" "checksum lock_api 0.3.1 (registry+https://github.com/rust-lang/crates.io-index)" = "f8912e782533a93a167888781b836336a6ca5da6175c05944c86cf28c31104dc" diff --git a/meilidb-core/Cargo.toml b/meilidb-core/Cargo.toml index c246b5368..5a0e10ad0 100644 --- a/meilidb-core/Cargo.toml +++ b/meilidb-core/Cargo.toml @@ -12,8 +12,10 @@ chrono = { version = "0.4.9", features = ["serde"] } crossbeam-channel = "0.4.0" deunicode = "1.0.0" env_logger = "0.7.0" +fst = "0.3.5" hashbrown = { version = "0.6.0", features = ["serde"] } heed = "0.5.0" +levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] } log = "0.4.8" meilidb-schema = { path = "../meilidb-schema", version = "0.6.0" } meilidb-tokenizer = { path = "../meilidb-tokenizer", version = "0.6.0" } @@ -26,15 +28,6 @@ siphasher = "0.3.0" slice-group-by = "0.2.6" zerocopy = "0.2.8" -[dependencies.levenshtein_automata] -git = "https://github.com/Kerollmops/levenshtein-automata.git" -branch = "arc-byte-slice" -features = ["fst_automaton"] - -[dependencies.fst] -git = "https://github.com/Kerollmops/fst.git" -branch = "arc-byte-slice" - [dev-dependencies] assert_matches = "1.3" csv = "1.0.7" diff --git a/meilidb-core/src/store/docs_words.rs b/meilidb-core/src/store/docs_words.rs index 2dbd805fe..e39aeb41c 100644 --- a/meilidb-core/src/store/docs_words.rs +++ b/meilidb-core/src/store/docs_words.rs @@ -39,7 +39,7 @@ impl DocsWords { match self.docs_words.get(reader, &document_id)? { Some(bytes) => { let len = bytes.len(); - let bytes = Arc::from(bytes); + let bytes = Arc::new(bytes.to_owned()); let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap(); Ok(Some(fst::Set::from(fst))) } diff --git a/meilidb-core/src/store/main.rs b/meilidb-core/src/store/main.rs index 5755c2da1..eeb04d21f 100644 --- a/meilidb-core/src/store/main.rs +++ b/meilidb-core/src/store/main.rs @@ -31,7 +31,7 @@ impl Main { match self.main.get::(reader, WORDS_KEY)? { Some(bytes) => { let len = bytes.len(); - let bytes = Arc::from(bytes); + let bytes = Arc::new(bytes.to_owned()); let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap(); Ok(Some(fst::Set::from(fst))) } @@ -68,7 +68,7 @@ impl Main { match self.main.get::(reader, SYNONYMS_KEY)? { Some(bytes) => { let len = bytes.len(); - let bytes = Arc::from(bytes); + let bytes = Arc::new(bytes.to_owned()); let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap(); Ok(Some(fst::Set::from(fst))) } @@ -86,7 +86,7 @@ impl Main { match self.main.get::(reader, STOP_WORDS_KEY)? { Some(bytes) => { let len = bytes.len(); - let bytes = Arc::from(bytes); + let bytes = Arc::new(bytes.to_owned()); let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap(); Ok(Some(fst::Set::from(fst))) } diff --git a/meilidb-core/src/store/synonyms.rs b/meilidb-core/src/store/synonyms.rs index 2c497b86a..9f4052170 100644 --- a/meilidb-core/src/store/synonyms.rs +++ b/meilidb-core/src/store/synonyms.rs @@ -30,7 +30,7 @@ impl Synonyms { match self.synonyms.get(reader, word)? { Some(bytes) => { let len = bytes.len(); - let bytes = Arc::from(bytes); + let bytes = Arc::new(bytes.to_owned()); let fst = fst::raw::Fst::from_shared_bytes(bytes, 0, len).unwrap(); Ok(Some(fst::Set::from(fst))) } From 2dd7751e098e470a944516bcaa49420fc464f2fa Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sun, 17 Nov 2019 11:43:00 +0100 Subject: [PATCH 2/2] Disable the fst MemMap feature --- Cargo.lock | 11 ----------- meilidb-core/Cargo.toml | 2 +- 2 files changed, 1 insertion(+), 12 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 02ab5768c..3c9d29cba 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -444,7 +444,6 @@ version = "0.3.5" source = "registry+https://github.com/rust-lang/crates.io-index" dependencies = [ "byteorder 1.3.2 (registry+https://github.com/rust-lang/crates.io-index)", - "memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)", ] [[package]] @@ -953,15 +952,6 @@ dependencies = [ "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", ] -[[package]] -name = "memmap" -version = "0.6.2" -source = "registry+https://github.com/rust-lang/crates.io-index" -dependencies = [ - "libc 0.2.65 (registry+https://github.com/rust-lang/crates.io-index)", - "winapi 0.3.8 (registry+https://github.com/rust-lang/crates.io-index)", -] - [[package]] name = "memoffset" version = "0.5.3" @@ -2371,7 +2361,6 @@ dependencies = [ "checksum matches 0.1.8 (registry+https://github.com/rust-lang/crates.io-index)" = "7ffc5c5338469d4d3ea17d269fa8ea3512ad247247c30bd2df69e68309ed0a08" "checksum maybe-uninit 2.0.0 (registry+https://github.com/rust-lang/crates.io-index)" = "60302e4db3a61da70c0cb7991976248362f30319e88850c487b9b95bbf059e00" "checksum memchr 2.2.1 (registry+https://github.com/rust-lang/crates.io-index)" = "88579771288728879b57485cc7d6b07d648c9f0141eb955f8ab7f9d45394468e" -"checksum memmap 0.6.2 (registry+https://github.com/rust-lang/crates.io-index)" = "e2ffa2c986de11a9df78620c01eeaaf27d94d3ff02bf81bfcca953102dd0c6ff" "checksum memoffset 0.5.3 (registry+https://github.com/rust-lang/crates.io-index)" = "75189eb85871ea5c2e2c15abbdd541185f63b408415e5051f5cac122d8c774b9" "checksum mime 0.2.6 (registry+https://github.com/rust-lang/crates.io-index)" = "ba626b8a6de5da682e1caa06bdb42a335aee5a84db8e5046a3e8ab17ba0a3ae0" "checksum mime_guess 1.8.7 (registry+https://github.com/rust-lang/crates.io-index)" = "0d977de9ee851a0b16e932979515c0f3da82403183879811bc97d50bd9cc50f7" diff --git a/meilidb-core/Cargo.toml b/meilidb-core/Cargo.toml index 5a0e10ad0..0b34be89c 100644 --- a/meilidb-core/Cargo.toml +++ b/meilidb-core/Cargo.toml @@ -12,7 +12,7 @@ chrono = { version = "0.4.9", features = ["serde"] } crossbeam-channel = "0.4.0" deunicode = "1.0.0" env_logger = "0.7.0" -fst = "0.3.5" +fst = { version = "0.3.5", default-features = false } hashbrown = { version = "0.6.0", features = ["serde"] } heed = "0.5.0" levenshtein_automata = { version = "0.1.1", features = ["fst_automaton"] }