Use the log crate instead of stderr

This commit is contained in:
Kerollmops 2020-07-12 10:55:09 +02:00
parent 2c62eeea3c
commit 12358476da
No known key found for this signature in database
GPG key ID: 92ADA4E935E71FA4
7 changed files with 150 additions and 39 deletions

View file

@ -1,6 +1,7 @@
use std::cmp;
use std::time::Instant;
use log::debug;
use crate::iter_shortest_paths::astar_bag;
const ONE_ATTRIBUTE: u32 = 1000;
@ -153,18 +154,18 @@ impl BestProximity {
},
);
eprintln!("BestProximity::next() took {:.02?}", before.elapsed());
debug!("BestProximity::next() took {:.02?}", before.elapsed());
match result {
Some((paths, proximity)) => {
self.best_proximity = proximity + 1;
// We retrieve the last path that we convert into a Vec
let paths: Vec<_> = paths.map(|p| p.iter().filter_map(Node::position).collect()).collect();
eprintln!("result: {} {:?}", proximity, paths);
debug!("result: {} {:?}", proximity, paths);
Some((proximity, paths))
},
None => {
eprintln!("result: {:?}", None as Option<()>);
debug!("result: {:?}", None as Option<()>);
self.best_proximity += 1;
None
},

View file

@ -11,6 +11,7 @@ use cow_utils::CowUtils;
use fst::{Streamer, IntoStreamer};
use heed::EnvOpenOptions;
use heed::types::*;
use log::debug;
use oxidized_mtbl::{Reader, ReaderOptions, Writer, Merger, MergerOptions};
use rayon::prelude::*;
use roaring::RoaringBitmap;
@ -86,7 +87,7 @@ struct MtblKvStore(Option<File>);
impl MtblKvStore {
fn from_indexed(mut indexed: Indexed) -> anyhow::Result<MtblKvStore> {
eprintln!("Creating an MTBL store from an Indexed...");
debug!("Creating an MTBL store from an Indexed...");
let outfile = tempfile::tempfile()?;
let mut out = Writer::new(outfile, None)?;
@ -152,7 +153,7 @@ impl MtblKvStore {
let out = out.into_inner()?;
eprintln!("MTBL store created!");
debug!("MTBL store created!");
Ok(MtblKvStore(Some(out)))
}
@ -198,7 +199,7 @@ impl MtblKvStore {
fn from_many<F>(stores: Vec<MtblKvStore>, mut f: F) -> anyhow::Result<()>
where F: FnMut(&[u8], &[u8]) -> anyhow::Result<()>
{
eprintln!("Merging {} MTBL stores...", stores.len());
debug!("Merging {} MTBL stores...", stores.len());
let before = Instant::now();
let mmaps: Vec<_> = stores.iter().flat_map(|m| {
@ -217,7 +218,7 @@ impl MtblKvStore {
(f)(k, v)?;
}
eprintln!("MTBL stores merged in {:.02?}!", before.elapsed());
debug!("MTBL stores merged in {:.02?}!", before.elapsed());
Ok(())
}
}
@ -256,7 +257,7 @@ fn index_csv(
max_mem_usage: usize,
) -> anyhow::Result<Vec<MtblKvStore>>
{
eprintln!("{:?}: Indexing into an Indexed...", thread_index);
debug!("{:?}: Indexing into an Indexed...", thread_index);
let mut stores = Vec::new();
@ -281,7 +282,7 @@ fn index_csv(
let document_id = DocumentId::try_from(document_id).context("generated id is too big")?;
if document_id % (ONE_MILLION as u32) == 0 {
eprintln!("We have seen {}m documents so far.", document_id / ONE_MILLION as u32);
debug!("We have seen {}m documents so far.", document_id / ONE_MILLION as u32);
}
for (attr, content) in document.iter().enumerate().take(MAX_ATTRIBUTES) {
@ -310,21 +311,21 @@ fn index_csv(
if documents.len() % 100_000 == 0 {
let usage = mem_usage(&word_positions, &word_position_docids, &documents);
if usage > max_mem_usage {
eprintln!("Whoops too much memory used ({}B).", usage);
debug!("Whoops too much memory used ({}B).", usage);
let word_positions = mem::take(&mut word_positions);
let word_position_docids = mem::take(&mut word_position_docids);
let documents = mem::take(&mut documents);
let indexed = Indexed::new(word_positions, word_position_docids, headers.clone(), documents)?;
eprintln!("{:?}: Indexed created!", thread_index);
debug!("{:?}: Indexed created!", thread_index);
stores.push(MtblKvStore::from_indexed(indexed)?);
}
}
}
let indexed = Indexed::new(word_positions, word_position_docids, headers, documents)?;
eprintln!("{:?}: Indexed created!", thread_index);
debug!("{:?}: Indexed created!", thread_index);
stores.push(MtblKvStore::from_indexed(indexed)?);
Ok(stores)
@ -372,7 +373,7 @@ fn writer(wtxn: &mut heed::RwTxn, index: &Index, key: &[u8], val: &[u8]) -> anyh
fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> anyhow::Result<()> {
let before = Instant::now();
eprintln!("Computing the attributes documents ids...");
debug!("Computing the attributes documents ids...");
let fst = match index.fst(&wtxn)? {
Some(fst) => fst.map_data(|s| s.to_vec())?,
@ -408,7 +409,7 @@ fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> any
}
}
eprintln!("Computing the attributes documents ids took {:.02?}.", before.elapsed());
debug!("Computing the attributes documents ids took {:.02?}.", before.elapsed());
Ok(())
}
@ -444,7 +445,7 @@ fn main() -> anyhow::Result<()> {
let stores: Vec<_> = stores.into_iter().flatten().collect();
eprintln!("We are writing into LMDB...");
debug!("We are writing into LMDB...");
let mut wtxn = env.write_txn()?;
MtblKvStore::from_many(stores, |k, v| writer(&mut wtxn, &index, k, v))?;
@ -452,7 +453,7 @@ fn main() -> anyhow::Result<()> {
let count = index.documents.len(&wtxn)?;
wtxn.commit()?;
eprintln!("Wrote {} documents into LMDB", count);
debug!("Wrote {} documents into LMDB", count);
Ok(())
}

View file

@ -4,8 +4,9 @@ use std::path::PathBuf;
use std::time::Instant;
use heed::EnvOpenOptions;
use structopt::StructOpt;
use log::debug;
use milli::{Index, BEU32};
use structopt::StructOpt;
#[cfg(target_os = "linux")]
#[global_allocator]
@ -62,7 +63,7 @@ fn main() -> anyhow::Result<()> {
}
}
eprintln!("Took {:.02?} to find {} documents", before.elapsed(), documents_ids.len());
debug!("Took {:.02?} to find {} documents", before.elapsed(), documents_ids.len());
}
Ok(())

View file

@ -158,7 +158,7 @@ async fn main() -> anyhow::Result<()> {
.or(query_route);
let addr = SocketAddr::from_str(&opt.http_listen_addr).unwrap();
eprintln!("listening on http://{}", addr);
println!("listening on http://{}", addr);
warp::serve(routes).run(addr).await;
Ok(())

View file

@ -14,6 +14,7 @@ use fxhash::{FxHasher32, FxHasher64};
use heed::types::*;
use heed::{PolyDatabase, Database};
use levenshtein_automata::LevenshteinAutomatonBuilder as LevBuilder;
use log::debug;
use once_cell::sync::Lazy;
use roaring::RoaringBitmap;
@ -138,7 +139,7 @@ impl Index {
}
}
eprintln!("{} words for {:?} we have found positions {:?} in {:.02?}",
debug!("{} words for {:?} we have found positions {:?} in {:.02?}",
count, word, union_positions, before.elapsed());
words.push(derived_words);
positions.push(union_positions.iter().collect());
@ -168,9 +169,9 @@ impl Index {
words_attributes_docids.push(intersect_docids);
}
eprintln!("The documents you must find for each attribute: {:?}", words_attributes_docids);
debug!("The documents you must find for each attribute: {:?}", words_attributes_docids);
eprintln!("Retrieving words positions took {:.02?}", before.elapsed());
debug!("Retrieving words positions took {:.02?}", before.elapsed());
// Returns the union of the same position for all the derived words.
let unions_word_pos = |word: usize, pos: u32| {
@ -259,10 +260,10 @@ impl Index {
}
});
eprintln!("retrieving words took {:.02?} and took {:.02?} to intersect",
debug!("retrieving words took {:.02?} and took {:.02?} to intersect",
elapsed_retrieving, before_intersect.elapsed());
eprintln!("for proximity {:?} {:?} we took {:.02?} to find {} documents",
debug!("for proximity {:?} {:?} we took {:.02?} to find {} documents",
proximity, positions, before.elapsed(),
intersect_docids.as_ref().map_or(0, |rb| rb.len()));
@ -272,7 +273,7 @@ impl Index {
// We found enough documents we can stop here
if documents.iter().map(RoaringBitmap::len).sum::<u64>() + same_proximity_union.len() >= 20 {
eprintln!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
debug!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
break;
}
}
@ -294,8 +295,8 @@ impl Index {
}
documents.retain(|rb| !rb.is_empty());
eprintln!("documents: {:?}", documents);
eprintln!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
debug!("documents: {:?}", documents);
debug!("proximity {} took a total of {:.02?}", proximity, same_prox_before.elapsed());
// We found enough documents we can stop here.
if documents.iter().map(RoaringBitmap::len).sum::<u64>() >= 20 {
@ -303,7 +304,7 @@ impl Index {
}
}
eprintln!("{} candidates", documents.iter().map(RoaringBitmap::len).sum::<u64>());
debug!("{} candidates", documents.iter().map(RoaringBitmap::len).sum::<u64>());
Ok(documents.iter().flatten().take(20).collect())
}
}