diff --git a/Cargo.lock b/Cargo.lock index 94aea9e2a..f0ebc2ff6 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -79,12 +79,12 @@ dependencies = [ [[package]] name = "atty" -version = "0.2.14" +version = "0.2.11" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8" +checksum = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652" dependencies = [ - "hermit-abi", "libc", + "termion", "winapi 0.3.8", ] @@ -164,7 +164,7 @@ version = "0.2.13" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "31accafdb70df7871592c058eca3985b71104e15ac32f64706022c58867da931" dependencies = [ - "lazy_static", + "lazy_static 1.4.0", "memchr", "regex-automata", "serde", @@ -228,6 +228,17 @@ version = "0.1.10" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822" +[[package]] +name = "chrono" +version = "0.4.13" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c74d84029116787153e02106bf53e66828452a4b325cc8652b788b5967c0a0b6" +dependencies = [ + "num-integer", + "num-traits", + "time", +] + [[package]] name = "clap" version = "2.33.1" @@ -281,7 +292,7 @@ dependencies = [ "criterion-plot", "csv", "itertools", - "lazy_static", + "lazy_static 1.4.0", "num-traits", "oorandom", "plotters", @@ -324,7 +335,7 @@ dependencies = [ "autocfg 1.0.0", "cfg-if", "crossbeam-utils", - "lazy_static", + "lazy_static 1.4.0", "maybe-uninit", "memoffset", "scopeguard", @@ -348,7 +359,7 @@ checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8" dependencies = [ "autocfg 1.0.0", "cfg-if", - "lazy_static", + "lazy_static 1.4.0", ] [[package]] @@ -845,6 +856,12 @@ dependencies = [ "winapi-build", ] +[[package]] +name = "lazy_static" +version = "0.2.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73" + [[package]] name = "lazy_static" version = "1.4.0" @@ -951,6 +968,7 @@ dependencies = [ "itertools", "jemallocator", "levenshtein_automata", + "log 0.4.8", "memmap", "once_cell", "oxidized-mtbl", @@ -960,6 +978,7 @@ dependencies = [ "slice-group-by", "smallstr", "smallvec", + "stderrlog", "structopt", "tempfile", "tokio", @@ -1115,6 +1134,16 @@ dependencies = [ "version_check 0.9.2", ] +[[package]] +name = "num-integer" +version = "0.1.43" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8d59457e662d541ba17869cf51cf177c0b5f0cbf476c66bdc90bf1edac4f875b" +dependencies = [ + "autocfg 1.0.0", + "num-traits", +] + [[package]] name = "num-traits" version = "0.2.12" @@ -1134,6 +1163,12 @@ dependencies = [ "libc", ] +[[package]] +name = "numtoa" +version = "0.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef" + [[package]] name = "once_cell" version = "1.4.0" @@ -1505,7 +1540,7 @@ dependencies = [ "crossbeam-deque", "crossbeam-queue", "crossbeam-utils", - "lazy_static", + "lazy_static 1.4.0", "num_cpus", ] @@ -1524,6 +1559,15 @@ version = "0.1.56" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84" +[[package]] +name = "redox_termios" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76" +dependencies = [ + "redox_syscall", +] + [[package]] name = "regex" version = "1.3.9" @@ -1738,6 +1782,19 @@ dependencies = [ "winapi 0.3.8", ] +[[package]] +name = "stderrlog" +version = "0.4.3" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "32e5ee9b90a5452c570a0b0ac1c99ae9498db7e56e33d74366de7f2a7add7f25" +dependencies = [ + "atty", + "chrono", + "log 0.4.8", + "termcolor", + "thread_local", +] + [[package]] name = "structopt" version = "0.3.14" @@ -1745,7 +1802,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef" dependencies = [ "clap", - "lazy_static", + "lazy_static 1.4.0", "structopt-derive", ] @@ -1810,6 +1867,27 @@ dependencies = [ "winapi 0.3.8", ] +[[package]] +name = "termcolor" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f" +dependencies = [ + "winapi-util", +] + +[[package]] +name = "termion" +version = "1.5.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c22cec9d8978d906be5ac94bceb5a010d885c626c4c8855721a4dbd20e3ac905" +dependencies = [ + "libc", + "numtoa", + "redox_syscall", + "redox_termios", +] + [[package]] name = "textwrap" version = "0.11.0" @@ -1819,6 +1897,16 @@ dependencies = [ "unicode-width", ] +[[package]] +name = "thread_local" +version = "0.3.4" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1697c4b57aeeb7a536b647165a2825faddffb1d3bad386d507709bd51a90bb14" +dependencies = [ + "lazy_static 0.2.11", + "unreachable", +] + [[package]] name = "time" version = "0.1.43" @@ -1849,7 +1937,7 @@ dependencies = [ "fnv", "futures-core", "iovec", - "lazy_static", + "lazy_static 1.4.0", "libc", "memchr", "mio", @@ -2010,6 +2098,15 @@ version = "0.2.0" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c" +[[package]] +name = "unreachable" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56" +dependencies = [ + "void", +] + [[package]] name = "url" version = "2.1.1" @@ -2045,6 +2142,12 @@ version = "0.9.2" source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed" +[[package]] +name = "void" +version = "1.0.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d" + [[package]] name = "walkdir" version = "2.3.1" @@ -2115,7 +2218,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index" checksum = "ded84f06e0ed21499f6184df0e0cb3494727b0c5da89534e0fcc55c51d812101" dependencies = [ "bumpalo", - "lazy_static", + "lazy_static 1.4.0", "log 0.4.8", "proc-macro2", "quote", diff --git a/Cargo.toml b/Cargo.toml index 5d62757e4..4a991436f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -27,6 +27,10 @@ smallvec = "1.4.0" structopt = { version = "0.3.14", default-features = false } tempfile = "3.1.0" +# logging +log = "0.4.8" +stderrlog = "0.4.3" + # best proximity indexmap = "1.4.0" diff --git a/src/best_proximity.rs b/src/best_proximity.rs index cb6302a16..6dda4d1fb 100644 --- a/src/best_proximity.rs +++ b/src/best_proximity.rs @@ -1,6 +1,7 @@ use std::cmp; use std::time::Instant; +use log::debug; use crate::iter_shortest_paths::astar_bag; const ONE_ATTRIBUTE: u32 = 1000; @@ -153,18 +154,18 @@ impl BestProximity { }, ); - eprintln!("BestProximity::next() took {:.02?}", before.elapsed()); + debug!("BestProximity::next() took {:.02?}", before.elapsed()); match result { Some((paths, proximity)) => { self.best_proximity = proximity + 1; // We retrieve the last path that we convert into a Vec let paths: Vec<_> = paths.map(|p| p.iter().filter_map(Node::position).collect()).collect(); - eprintln!("result: {} {:?}", proximity, paths); + debug!("result: {} {:?}", proximity, paths); Some((proximity, paths)) }, None => { - eprintln!("result: {:?}", None as Option<()>); + debug!("result: {:?}", None as Option<()>); self.best_proximity += 1; None }, diff --git a/src/bin/indexer.rs b/src/bin/indexer.rs index a9191b618..6f490fae7 100644 --- a/src/bin/indexer.rs +++ b/src/bin/indexer.rs @@ -11,6 +11,7 @@ use cow_utils::CowUtils; use fst::{Streamer, IntoStreamer}; use heed::EnvOpenOptions; use heed::types::*; +use log::debug; use oxidized_mtbl::{Reader, ReaderOptions, Writer, Merger, MergerOptions}; use rayon::prelude::*; use roaring::RoaringBitmap; @@ -86,7 +87,7 @@ struct MtblKvStore(Option); impl MtblKvStore { fn from_indexed(mut indexed: Indexed) -> anyhow::Result { - eprintln!("Creating an MTBL store from an Indexed..."); + debug!("Creating an MTBL store from an Indexed..."); let outfile = tempfile::tempfile()?; let mut out = Writer::new(outfile, None)?; @@ -152,7 +153,7 @@ impl MtblKvStore { let out = out.into_inner()?; - eprintln!("MTBL store created!"); + debug!("MTBL store created!"); Ok(MtblKvStore(Some(out))) } @@ -198,7 +199,7 @@ impl MtblKvStore { fn from_many(stores: Vec, mut f: F) -> anyhow::Result<()> where F: FnMut(&[u8], &[u8]) -> anyhow::Result<()> { - eprintln!("Merging {} MTBL stores...", stores.len()); + debug!("Merging {} MTBL stores...", stores.len()); let before = Instant::now(); let mmaps: Vec<_> = stores.iter().flat_map(|m| { @@ -217,7 +218,7 @@ impl MtblKvStore { (f)(k, v)?; } - eprintln!("MTBL stores merged in {:.02?}!", before.elapsed()); + debug!("MTBL stores merged in {:.02?}!", before.elapsed()); Ok(()) } } @@ -256,7 +257,7 @@ fn index_csv( max_mem_usage: usize, ) -> anyhow::Result> { - eprintln!("{:?}: Indexing into an Indexed...", thread_index); + debug!("{:?}: Indexing into an Indexed...", thread_index); let mut stores = Vec::new(); @@ -281,7 +282,7 @@ fn index_csv( let document_id = DocumentId::try_from(document_id).context("generated id is too big")?; if document_id % (ONE_MILLION as u32) == 0 { - eprintln!("We have seen {}m documents so far.", document_id / ONE_MILLION as u32); + debug!("We have seen {}m documents so far.", document_id / ONE_MILLION as u32); } for (attr, content) in document.iter().enumerate().take(MAX_ATTRIBUTES) { @@ -310,21 +311,21 @@ fn index_csv( if documents.len() % 100_000 == 0 { let usage = mem_usage(&word_positions, &word_position_docids, &documents); if usage > max_mem_usage { - eprintln!("Whoops too much memory used ({}B).", usage); + debug!("Whoops too much memory used ({}B).", usage); let word_positions = mem::take(&mut word_positions); let word_position_docids = mem::take(&mut word_position_docids); let documents = mem::take(&mut documents); let indexed = Indexed::new(word_positions, word_position_docids, headers.clone(), documents)?; - eprintln!("{:?}: Indexed created!", thread_index); + debug!("{:?}: Indexed created!", thread_index); stores.push(MtblKvStore::from_indexed(indexed)?); } } } let indexed = Indexed::new(word_positions, word_position_docids, headers, documents)?; - eprintln!("{:?}: Indexed created!", thread_index); + debug!("{:?}: Indexed created!", thread_index); stores.push(MtblKvStore::from_indexed(indexed)?); Ok(stores) @@ -372,7 +373,7 @@ fn writer(wtxn: &mut heed::RwTxn, index: &Index, key: &[u8], val: &[u8]) -> anyh fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> anyhow::Result<()> { let before = Instant::now(); - eprintln!("Computing the attributes documents ids..."); + debug!("Computing the attributes documents ids..."); let fst = match index.fst(&wtxn)? { Some(fst) => fst.map_data(|s| s.to_vec())?, @@ -408,7 +409,7 @@ fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> any } } - eprintln!("Computing the attributes documents ids took {:.02?}.", before.elapsed()); + debug!("Computing the attributes documents ids took {:.02?}.", before.elapsed()); Ok(()) } @@ -444,7 +445,7 @@ fn main() -> anyhow::Result<()> { let stores: Vec<_> = stores.into_iter().flatten().collect(); - eprintln!("We are writing into LMDB..."); + debug!("We are writing into LMDB..."); let mut wtxn = env.write_txn()?; MtblKvStore::from_many(stores, |k, v| writer(&mut wtxn, &index, k, v))?; @@ -452,7 +453,7 @@ fn main() -> anyhow::Result<()> { let count = index.documents.len(&wtxn)?; wtxn.commit()?; - eprintln!("Wrote {} documents into LMDB", count); + debug!("Wrote {} documents into LMDB", count); Ok(()) } diff --git a/src/bin/search.rs b/src/bin/search.rs index 5f5d458f4..92112826a 100644 --- a/src/bin/search.rs +++ b/src/bin/search.rs @@ -4,8 +4,9 @@ use std::path::PathBuf; use std::time::Instant; use heed::EnvOpenOptions; -use structopt::StructOpt; +use log::debug; use milli::{Index, BEU32}; +use structopt::StructOpt; #[cfg(target_os = "linux")] #[global_allocator] @@ -62,7 +63,7 @@ fn main() -> anyhow::Result<()> { } } - eprintln!("Took {:.02?} to find {} documents", before.elapsed(), documents_ids.len()); + debug!("Took {:.02?} to find {} documents", before.elapsed(), documents_ids.len()); } Ok(()) diff --git a/src/bin/serve.rs b/src/bin/serve.rs index 387c26d1b..9d4ae77d1 100644 --- a/src/bin/serve.rs +++ b/src/bin/serve.rs @@ -158,7 +158,7 @@ async fn main() -> anyhow::Result<()> { .or(query_route); let addr = SocketAddr::from_str(&opt.http_listen_addr).unwrap(); - eprintln!("listening on http://{}", addr); + println!("listening on http://{}", addr); warp::serve(routes).run(addr).await; Ok(()) diff --git a/src/lib.rs b/src/lib.rs index 9d1240a1a..fad74976f 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -14,6 +14,7 @@ use fxhash::{FxHasher32, FxHasher64}; use heed::types::*; use heed::{PolyDatabase, Database}; use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder; +use log::debug; use once_cell::sync::Lazy; use roaring::RoaringBitmap; @@ -138,7 +139,7 @@ impl Index { } } - eprintln!("{} words for {:?} we have found positions {:?} in {:.02?}", + debug!("{} words for {:?} we have found positions {:?} in {:.02?}", count, word, union_positions, before.elapsed()); words.push(derived_words); positions.push(union_positions.iter().collect()); @@ -168,9 +169,9 @@ impl Index { words_attributes_docids.push(intersect_docids); } - eprintln!("The documents you must find for each attribute: {:?}", words_attributes_docids); + debug!("The documents you must find for each attribute: {:?}", words_attributes_docids); - eprintln!("Retrieving words positions took {:.02?}", before.elapsed()); + debug!("Retrieving words positions took {:.02?}", before.elapsed()); // Returns the union of the same position for all the derived words. let unions_word_pos = |word: usize, pos: u32| { @@ -259,10 +260,10 @@ impl Index { } }); - eprintln!("retrieving words took {:.02?} and took {:.02?} to intersect", + debug!("retrieving words took {:.02?} and took {:.02?} to intersect", elapsed_retrieving, before_intersect.elapsed()); - eprintln!("for proximity {:?} {:?} we took {:.02?} to find {} documents", + debug!("for proximity {:?} {:?} we took {:.02?} to find {} documents", proximity, positions, before.elapsed(), intersect_docids.as_ref().map_or(0, |rb| rb.len())); @@ -272,7 +273,7 @@ impl Index { // We found enough documents we can stop here if documents.iter().map(RoaringBitmap::len).sum::() + same_proximity_union.len() >= 20 { - eprintln!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed()); + debug!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed()); break; } } @@ -294,8 +295,8 @@ impl Index { } documents.retain(|rb| !rb.is_empty()); - eprintln!("documents: {:?}", documents); - eprintln!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed()); + debug!("documents: {:?}", documents); + debug!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed()); // We found enough documents we can stop here. if documents.iter().map(RoaringBitmap::len).sum::() >= 20 { @@ -303,7 +304,7 @@ impl Index { } } - eprintln!("{} candidates", documents.iter().map(RoaringBitmap::len).sum::()); + debug!("{} candidates", documents.iter().map(RoaringBitmap::len).sum::()); Ok(documents.iter().flatten().take(20).collect()) } }