Use the log crate instead of stderr

This commit is contained in:
Kerollmops 2020-07-12 10:55:09 +02:00
parent 2c62eeea3c
commit 12358476da
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
7 changed files with 150 additions and 39 deletions

125
Cargo.lock generated
View File

@ -79,12 +79,12 @@ dependencies = [
[[package]]
name = "atty"
version = "0.2.14"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d9b39be18770d11421cdb1b9947a45dd3f37e93092cbf377614828a319d5fee8"
checksum = "9a7d5b8723950951411ee34d271d99dddcc2035a16ab25310ea2c8cfd4369652"
dependencies = [
"hermit-abi",
"libc",
"termion",
"winapi 0.3.8",
]
@ -164,7 +164,7 @@ version = "0.2.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "31accafdb70df7871592c058eca3985b71104e15ac32f64706022c58867da931"
dependencies = [
"lazy_static",
"lazy_static 1.4.0",
"memchr",
"regex-automata",
"serde",
@ -228,6 +228,17 @@ version = "0.1.10"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4785bdd1c96b2a846b2bd7cc02e86b6b3dbf14e7e53446c4f54c92a361040822"
[[package]]
name = "chrono"
version = "0.4.13"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c74d84029116787153e02106bf53e66828452a4b325cc8652b788b5967c0a0b6"
dependencies = [
"num-integer",
"num-traits",
"time",
]
[[package]]
name = "clap"
version = "2.33.1"
@ -281,7 +292,7 @@ dependencies = [
"criterion-plot",
"csv",
"itertools",
"lazy_static",
"lazy_static 1.4.0",
"num-traits",
"oorandom",
"plotters",
@ -324,7 +335,7 @@ dependencies = [
"autocfg 1.0.0",
"cfg-if",
"crossbeam-utils",
"lazy_static",
"lazy_static 1.4.0",
"maybe-uninit",
"memoffset",
"scopeguard",
@ -348,7 +359,7 @@ checksum = "c3c7c73a2d1e9fc0886a08b93e98eb643461230d5f1925e4036204d5f2e261a8"
dependencies = [
"autocfg 1.0.0",
"cfg-if",
"lazy_static",
"lazy_static 1.4.0",
]
[[package]]
@ -845,6 +856,12 @@ dependencies = [
"winapi-build",
]
[[package]]
name = "lazy_static"
version = "0.2.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "76f033c7ad61445c5b347c7382dd1237847eb1bce590fe50365dcb33d546be73"
[[package]]
name = "lazy_static"
version = "1.4.0"
@ -951,6 +968,7 @@ dependencies = [
"itertools",
"jemallocator",
"levenshtein_automata",
"log 0.4.8",
"memmap",
"once_cell",
"oxidized-mtbl",
@ -960,6 +978,7 @@ dependencies = [
"slice-group-by",
"smallstr",
"smallvec",
"stderrlog",
"structopt",
"tempfile",
"tokio",
@ -1115,6 +1134,16 @@ dependencies = [
"version_check 0.9.2",
]
[[package]]
name = "num-integer"
version = "0.1.43"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8d59457e662d541ba17869cf51cf177c0b5f0cbf476c66bdc90bf1edac4f875b"
dependencies = [
"autocfg 1.0.0",
"num-traits",
]
[[package]]
name = "num-traits"
version = "0.2.12"
@ -1134,6 +1163,12 @@ dependencies = [
"libc",
]
[[package]]
name = "numtoa"
version = "0.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b8f8bdf33df195859076e54ab11ee78a1b208382d3a26ec40d142ffc1ecc49ef"
[[package]]
name = "once_cell"
version = "1.4.0"
@ -1505,7 +1540,7 @@ dependencies = [
"crossbeam-deque",
"crossbeam-queue",
"crossbeam-utils",
"lazy_static",
"lazy_static 1.4.0",
"num_cpus",
]
@ -1524,6 +1559,15 @@ version = "0.1.56"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2439c63f3f6139d1b57529d16bc3b8bb855230c8efcc5d3a896c8bea7c3b1e84"
[[package]]
name = "redox_termios"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7e891cfe48e9100a70a3b6eb652fef28920c117d366339687bd5576160db0f76"
dependencies = [
"redox_syscall",
]
[[package]]
name = "regex"
version = "1.3.9"
@ -1738,6 +1782,19 @@ dependencies = [
"winapi 0.3.8",
]
[[package]]
name = "stderrlog"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "32e5ee9b90a5452c570a0b0ac1c99ae9498db7e56e33d74366de7f2a7add7f25"
dependencies = [
"atty",
"chrono",
"log 0.4.8",
"termcolor",
"thread_local",
]
[[package]]
name = "structopt"
version = "0.3.14"
@ -1745,7 +1802,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "863246aaf5ddd0d6928dfeb1a9ca65f505599e4e1b399935ef7e75107516b4ef"
dependencies = [
"clap",
"lazy_static",
"lazy_static 1.4.0",
"structopt-derive",
]
@ -1810,6 +1867,27 @@ dependencies = [
"winapi 0.3.8",
]
[[package]]
name = "termcolor"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "bb6bfa289a4d7c5766392812c0a1f4c1ba45afa1ad47803c11e1f407d846d75f"
dependencies = [
"winapi-util",
]
[[package]]
name = "termion"
version = "1.5.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c22cec9d8978d906be5ac94bceb5a010d885c626c4c8855721a4dbd20e3ac905"
dependencies = [
"libc",
"numtoa",
"redox_syscall",
"redox_termios",
]
[[package]]
name = "textwrap"
version = "0.11.0"
@ -1819,6 +1897,16 @@ dependencies = [
"unicode-width",
]
[[package]]
name = "thread_local"
version = "0.3.4"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1697c4b57aeeb7a536b647165a2825faddffb1d3bad386d507709bd51a90bb14"
dependencies = [
"lazy_static 0.2.11",
"unreachable",
]
[[package]]
name = "time"
version = "0.1.43"
@ -1849,7 +1937,7 @@ dependencies = [
"fnv",
"futures-core",
"iovec",
"lazy_static",
"lazy_static 1.4.0",
"libc",
"memchr",
"mio",
@ -2010,6 +2098,15 @@ version = "0.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "826e7639553986605ec5979c7dd957c7895e93eabed50ab2ffa7f6128a75097c"
[[package]]
name = "unreachable"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "382810877fe448991dfc7f0dd6e3ae5d58088fd0ea5e35189655f84e6814fa56"
dependencies = [
"void",
]
[[package]]
name = "url"
version = "2.1.1"
@ -2045,6 +2142,12 @@ version = "0.9.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b5a972e5669d67ba988ce3dc826706fb0a8b01471c088cb0b6110b805cc36aed"
[[package]]
name = "void"
version = "1.0.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a02e4885ed3bc0f2de90ea6dd45ebcbb66dacffe03547fadbb0eeae2770887d"
[[package]]
name = "walkdir"
version = "2.3.1"
@ -2115,7 +2218,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ded84f06e0ed21499f6184df0e0cb3494727b0c5da89534e0fcc55c51d812101"
dependencies = [
"bumpalo",
"lazy_static",
"lazy_static 1.4.0",
"log 0.4.8",
"proc-macro2",
"quote",

View File

@ -27,6 +27,10 @@ smallvec = "1.4.0"
structopt = { version = "0.3.14", default-features = false }
tempfile = "3.1.0"
# logging
log = "0.4.8"
stderrlog = "0.4.3"
# best proximity
indexmap = "1.4.0"

View File

@ -1,6 +1,7 @@
use std::cmp;
use std::time::Instant;
use log::debug;
use crate::iter_shortest_paths::astar_bag;
const ONE_ATTRIBUTE: u32 = 1000;
@ -153,18 +154,18 @@ impl BestProximity {
},
);
eprintln!("BestProximity::next() took {:.02?}", before.elapsed());
debug!("BestProximity::next() took {:.02?}", before.elapsed());
match result {
Some((paths, proximity)) => {
self.best_proximity = proximity + 1;
// We retrieve the last path that we convert into a Vec
let paths: Vec<_> = paths.map(|p| p.iter().filter_map(Node::position).collect()).collect();
eprintln!("result: {} {:?}", proximity, paths);
debug!("result: {} {:?}", proximity, paths);
Some((proximity, paths))
},
None => {
eprintln!("result: {:?}", None as Option<()>);
debug!("result: {:?}", None as Option<()>);
self.best_proximity += 1;
None
},

View File

@ -11,6 +11,7 @@ use cow_utils::CowUtils;
use fst::{Streamer, IntoStreamer};
use heed::EnvOpenOptions;
use heed::types::*;
use log::debug;
use oxidized_mtbl::{Reader, ReaderOptions, Writer, Merger, MergerOptions};
use rayon::prelude::*;
use roaring::RoaringBitmap;
@ -86,7 +87,7 @@ struct MtblKvStore(Option<File>);
impl MtblKvStore {
fn from_indexed(mut indexed: Indexed) -> anyhow::Result<MtblKvStore> {
eprintln!("Creating an MTBL store from an Indexed...");
debug!("Creating an MTBL store from an Indexed...");
let outfile = tempfile::tempfile()?;
let mut out = Writer::new(outfile, None)?;
@ -152,7 +153,7 @@ impl MtblKvStore {
let out = out.into_inner()?;
eprintln!("MTBL store created!");
debug!("MTBL store created!");
Ok(MtblKvStore(Some(out)))
}
@ -198,7 +199,7 @@ impl MtblKvStore {
fn from_many<F>(stores: Vec<MtblKvStore>, mut f: F) -> anyhow::Result<()>
where F: FnMut(&[u8], &[u8]) -> anyhow::Result<()>
{
eprintln!("Merging {} MTBL stores...", stores.len());
debug!("Merging {} MTBL stores...", stores.len());
let before = Instant::now();
let mmaps: Vec<_> = stores.iter().flat_map(|m| {
@ -217,7 +218,7 @@ impl MtblKvStore {
(f)(k, v)?;
}
eprintln!("MTBL stores merged in {:.02?}!", before.elapsed());
debug!("MTBL stores merged in {:.02?}!", before.elapsed());
Ok(())
}
}
@ -256,7 +257,7 @@ fn index_csv(
max_mem_usage: usize,
) -> anyhow::Result<Vec<MtblKvStore>>
{
eprintln!("{:?}: Indexing into an Indexed...", thread_index);
debug!("{:?}: Indexing into an Indexed...", thread_index);
let mut stores = Vec::new();
@ -281,7 +282,7 @@ fn index_csv(
let document_id = DocumentId::try_from(document_id).context("generated id is too big")?;
if document_id % (ONE_MILLION as u32) == 0 {
eprintln!("We have seen {}m documents so far.", document_id / ONE_MILLION as u32);
debug!("We have seen {}m documents so far.", document_id / ONE_MILLION as u32);
}
for (attr, content) in document.iter().enumerate().take(MAX_ATTRIBUTES) {
@ -310,21 +311,21 @@ fn index_csv(
if documents.len() % 100_000 == 0 {
let usage = mem_usage(&word_positions, &word_position_docids, &documents);
if usage > max_mem_usage {
eprintln!("Whoops too much memory used ({}B).", usage);
debug!("Whoops too much memory used ({}B).", usage);
let word_positions = mem::take(&mut word_positions);
let word_position_docids = mem::take(&mut word_position_docids);
let documents = mem::take(&mut documents);
let indexed = Indexed::new(word_positions, word_position_docids, headers.clone(), documents)?;
eprintln!("{:?}: Indexed created!", thread_index);
debug!("{:?}: Indexed created!", thread_index);
stores.push(MtblKvStore::from_indexed(indexed)?);
}
}
}
let indexed = Indexed::new(word_positions, word_position_docids, headers, documents)?;
eprintln!("{:?}: Indexed created!", thread_index);
debug!("{:?}: Indexed created!", thread_index);
stores.push(MtblKvStore::from_indexed(indexed)?);
Ok(stores)
@ -372,7 +373,7 @@ fn writer(wtxn: &mut heed::RwTxn, index: &Index, key: &[u8], val: &[u8]) -> anyh
fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> anyhow::Result<()> {
let before = Instant::now();
eprintln!("Computing the attributes documents ids...");
debug!("Computing the attributes documents ids...");
let fst = match index.fst(&wtxn)? {
Some(fst) => fst.map_data(|s| s.to_vec())?,
@ -408,7 +409,7 @@ fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> any
}
}
eprintln!("Computing the attributes documents ids took {:.02?}.", before.elapsed());
debug!("Computing the attributes documents ids took {:.02?}.", before.elapsed());
Ok(())
}
@ -444,7 +445,7 @@ fn main() -> anyhow::Result<()> {
let stores: Vec<_> = stores.into_iter().flatten().collect();
eprintln!("We are writing into LMDB...");
debug!("We are writing into LMDB...");
let mut wtxn = env.write_txn()?;
MtblKvStore::from_many(stores, |k, v| writer(&mut wtxn, &index, k, v))?;
@ -452,7 +453,7 @@ fn main() -> anyhow::Result<()> {
let count = index.documents.len(&wtxn)?;
wtxn.commit()?;
eprintln!("Wrote {} documents into LMDB", count);
debug!("Wrote {} documents into LMDB", count);
Ok(())
}

View File

@ -4,8 +4,9 @@ use std::path::PathBuf;
use std::time::Instant;
use heed::EnvOpenOptions;
use structopt::StructOpt;
use log::debug;
use milli::{Index, BEU32};
use structopt::StructOpt;
#[cfg(target_os = "linux")]
#[global_allocator]
@ -62,7 +63,7 @@ fn main() -> anyhow::Result<()> {
}
}
eprintln!("Took {:.02?} to find {} documents", before.elapsed(), documents_ids.len());
debug!("Took {:.02?} to find {} documents", before.elapsed(), documents_ids.len());
}
Ok(())

View File

@ -158,7 +158,7 @@ async fn main() -> anyhow::Result<()> {
.or(query_route);
let addr = SocketAddr::from_str(&opt.http_listen_addr).unwrap();
eprintln!("listening on http://{}", addr);
println!("listening on http://{}", addr);
warp::serve(routes).run(addr).await;
Ok(())

View File

@ -14,6 +14,7 @@ use fxhash::{FxHasher32, FxHasher64};
use heed::types::*;
use heed::{PolyDatabase, Database};
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
use log::debug;
use once_cell::sync::Lazy;
use roaring::RoaringBitmap;
@ -138,7 +139,7 @@ impl Index {
}
}
eprintln!("{} words for {:?} we have found positions {:?} in {:.02?}",
debug!("{} words for {:?} we have found positions {:?} in {:.02?}",
count, word, union_positions, before.elapsed());
words.push(derived_words);
positions.push(union_positions.iter().collect());
@ -168,9 +169,9 @@ impl Index {
words_attributes_docids.push(intersect_docids);
}
eprintln!("The documents you must find for each attribute: {:?}", words_attributes_docids);
debug!("The documents you must find for each attribute: {:?}", words_attributes_docids);
eprintln!("Retrieving words positions took {:.02?}", before.elapsed());
debug!("Retrieving words positions took {:.02?}", before.elapsed());
// Returns the union of the same position for all the derived words.
let unions_word_pos = |word: usize, pos: u32| {
@ -259,10 +260,10 @@ impl Index {
}
});
eprintln!("retrieving words took {:.02?} and took {:.02?} to intersect",
debug!("retrieving words took {:.02?} and took {:.02?} to intersect",
elapsed_retrieving, before_intersect.elapsed());
eprintln!("for proximity {:?} {:?} we took {:.02?} to find {} documents",
debug!("for proximity {:?} {:?} we took {:.02?} to find {} documents",
proximity, positions, before.elapsed(),
intersect_docids.as_ref().map_or(0, |rb| rb.len()));
@ -272,7 +273,7 @@ impl Index {
// We found enough documents we can stop here
if documents.iter().map(RoaringBitmap::len).sum::<u64>() + same_proximity_union.len() >= 20 {
eprintln!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
debug!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
break;
}
}
@ -294,8 +295,8 @@ impl Index {
}
documents.retain(|rb| !rb.is_empty());
eprintln!("documents: {:?}", documents);
eprintln!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
debug!("documents: {:?}", documents);
debug!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
// We found enough documents we can stop here.
if documents.iter().map(RoaringBitmap::len).sum::<u64>() >= 20 {
@ -303,7 +304,7 @@ impl Index {
}
}
eprintln!("{} candidates", documents.iter().map(RoaringBitmap::len).sum::<u64>());
debug!("{} candidates", documents.iter().map(RoaringBitmap::len).sum::<u64>());
Ok(documents.iter().flatten().take(20).collect())
}
}