From ff479c865d45f0d14c9b88e72052a16fd4ba94b1 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Fri, 21 Aug 2020 16:08:32 +0200 Subject: [PATCH] Replace pipe by ringtail to improve stdin read performances --- Cargo.lock | 27 +++++++-------------------- Cargo.toml | 2 +- src/bin/indexer.rs | 6 ++++-- 3 files changed, 12 insertions(+), 23 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index fc39b8f47..d59b09063 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -341,16 +341,6 @@ dependencies = [ "itertools", ] -[[package]] -name = "crossbeam-channel" -version = "0.4.3" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "09ee0cc8804d5393478d743b035099520087a5186f3b93fa58cec08fa62407b6" -dependencies = [ - "cfg-if", - "crossbeam-utils", -] - [[package]] name = "crossbeam-deque" version = "0.7.3" @@ -1027,8 +1017,8 @@ dependencies = [ "memmap", "once_cell", "oxidized-mtbl", - "pipe", "rayon", + "ringtail", "roaring", "serde", "slice-group-by", @@ -1345,15 +1335,6 @@ version = "0.1.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "8b870d8c151b6f2fb93e84a13146138f05d02ed11c7e7c54f8826aaaf7c9f184" -[[package]] -name = "pipe" -version = "0.3.0" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "bcd11e042e056991b5df9c0c5ae6bd0cce219b74294c40f65b89f40f7030106c" -dependencies = [ - "crossbeam-channel", -] - [[package]] name = "pkg-config" version = "0.3.17" @@ -1672,6 +1653,12 @@ dependencies = [ "winapi 0.3.8", ] +[[package]] +name = "ringtail" +version = "0.3.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "21215c1b9d8f7832b433255bd9eea3e2779aa55b21b2f8e13aad62c74749b237" + [[package]] name = "roaring" version = "0.6.0" diff --git a/Cargo.toml b/Cargo.toml index 5880fb4d4..b6eef69ea 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -22,8 +22,8 @@ levenshtein_automata = { version = "0.2.0", features = ["fst_automaton"] } memmap = "0.7.0" once_cell = "1.4.0" oxidized-mtbl = { git = "https://github.com/Kerollmops/oxidized-mtbl.git", rev = "13294cc" } -pipe = "0.3.0" rayon = "1.3.1" +ringtail = "0.3.0" roaring = { git = "https://github.com/Kerollmops/roaring-rs.git", branch = "mem-usage" } slice-group-by = "0.2.6" smallstr = "0.2.0" diff --git a/src/bin/indexer.rs b/src/bin/indexer.rs index 73c1ef334..6d27d7ee2 100644 --- a/src/bin/indexer.rs +++ b/src/bin/indexer.rs @@ -73,7 +73,9 @@ struct Opt { verbose: usize, /// CSV file to index, if unspecified the CSV is read from standard input. - /// Note that it is much faster to index from a file. + /// + /// Note that it is much faster to index from a file as when the indexer reads from stdin + /// it will dedicate a thread for that and context switches could slow down the indexing jobs. csv_file: Option, } @@ -507,7 +509,7 @@ fn main() -> anyhow::Result<()> { None => { let mut csv_readers = Vec::new(); let mut writers = Vec::new(); - for (r, w) in (0..num_threads).map(|_| pipe::pipe()) { + for (r, w) in (0..num_threads).map(|_| ringtail::io::pipe()) { let r = Box::new(r) as Box; csv_readers.push(csv::Reader::from_reader(r)); writers.push(w);