Merge the whole list of generated MTBL in one go

This commit is contained in:
Kerollmops 2020-06-04 17:38:43 +02:00
parent 3a23dc242e
commit c42d3c19e2
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -265,7 +265,7 @@ fn main() -> anyhow::Result<()> {
let index = Index::new(&env)?; let index = Index::new(&env)?;
let mut stores: Vec<_> = opt.files_to_index let stores: Vec<_> = opt.files_to_index
.into_par_iter() .into_par_iter()
.map(|path| { .map(|path| {
let rdr = csv::Reader::from_path(path)?; let rdr = csv::Reader::from_path(path)?;
@ -276,20 +276,6 @@ fn main() -> anyhow::Result<()> {
}) })
.collect::<Result<_, _>>()?; .collect::<Result<_, _>>()?;
while stores.len() > 3 {
let chunk_size = (stores.len() / rayon::current_num_threads()).max(2);
let s = std::mem::take(&mut stores);
stores = s.into_par_iter().chunks(chunk_size)
.map(|v| {
let outfile = tempfile::tempfile()?;
let mut out = Writer::new(outfile, None)?;
MtblKvStore::from_many(v, |k, v| Ok(out.add(k, v).unwrap()))?;
let out = out.into_inner()?;
Ok(MtblKvStore(Some(out))) as anyhow::Result<_>
})
.collect::<Result<_, _>>()?;
}
eprintln!("We are writing into LMDB..."); eprintln!("We are writing into LMDB...");
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
MtblKvStore::from_many(stores, |k, v| writer(&mut wtxn, &index, k, v))?; MtblKvStore::from_many(stores, |k, v| writer(&mut wtxn, &index, k, v))?;