Clean up some lines

This commit is contained in:
Clément Renault 2020-08-06 10:20:26 +02:00
parent a4e3c7c37c
commit 8d734941af
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -165,8 +165,7 @@ impl Store {
key.truncate(1); key.truncate(1);
key.extend_from_slice(&word); key.extend_from_slice(&word);
// we postfix the word by the positions it appears in // we postfix the word by the positions it appears in
let position_bytes = pos.to_be_bytes(); key.extend_from_slice(&pos.to_be_bytes());
key.extend_from_slice(&position_bytes);
// We serialize the document ids into a buffer // We serialize the document ids into a buffer
buffer.clear(); buffer.clear();
ids.serialize_into(&mut buffer)?; ids.serialize_into(&mut buffer)?;
@ -174,8 +173,6 @@ impl Store {
if lmdb_key_valid_size(&key) { if lmdb_key_valid_size(&key) {
sorter.insert(&key, &buffer)?; sorter.insert(&key, &buffer)?;
} }
// And cleanup the position afterward
key.truncate(key.len() - position_bytes.len());
} }
Ok(()) Ok(())
@ -429,15 +426,16 @@ fn main() -> anyhow::Result<()> {
let index = Index::new(&env)?; let index = Index::new(&env)?;
let num_threads = rayon::current_num_threads(); let num_threads = rayon::current_num_threads();
let max_nb_chunks = opt.max_nb_chunks; let max_nb_chunks = opt.max_nb_chunks;
let max_memory = opt.max_memory; let max_memory = opt.max_memory;
// We duplicate the file # jobs times. // We duplicate the file # jobs times.
let file = opt.csv_file.unwrap(); let file = opt.csv_file.unwrap();
let csv_readers: Vec<_> = (0..num_threads).map(|_| csv::Reader::from_path(&file)).collect::<Result<_, _>>()?; let csv_readers = (0..num_threads)
.map(|_| csv::Reader::from_path(&file))
.collect::<Result<Vec<_>, _>>()?;
let stores: Vec<_> = csv_readers let stores = csv_readers
.into_par_iter() .into_par_iter()
.enumerate() .enumerate()
.map(|(i, rdr)| index_csv(rdr, i, num_threads, max_nb_chunks, max_memory)) .map(|(i, rdr)| index_csv(rdr, i, num_threads, max_nb_chunks, max_memory))