Merge pre-computed word attribute documents ids

This commit is contained in:
Kerollmops 2020-07-04 17:02:27 +02:00
parent fea7cac206
commit ac8353a64f
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4

View File

@ -174,14 +174,14 @@ fn index_csv<R: io::Read>(
index.put_fst(wtxn, &new_words_fst)?; index.put_fst(wtxn, &new_words_fst)?;
index.put_headers(wtxn, &headers)?; index.put_headers(wtxn, &headers)?;
let before = Instant::now();
compute_words_attributes_docids(wtxn, index)?;
eprintln!("Computing the attributes documents ids took {:.02?}.", before.elapsed());
Ok(()) Ok(())
} }
fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> anyhow::Result<()> { fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> anyhow::Result<()> {
eprintln!("Computing the attributes documents ids...");
let before = Instant::now();
let fst = match index.fst(&wtxn)? { let fst = match index.fst(&wtxn)? {
Some(fst) => fst.map_data(|s| s.to_vec())?, Some(fst) => fst.map_data(|s| s.to_vec())?,
None => return Ok(()), None => return Ok(()),
@ -216,8 +216,6 @@ fn compute_words_attributes_docids(wtxn: &mut heed::RwTxn, index: &Index) -> any
} }
} }
eprintln!("Computing the attributes documents ids took {:.02?}.", before.elapsed());
Ok(()) Ok(())
} }
@ -373,6 +371,32 @@ fn merge_databases(
eprintln!("Merging the word_position_docids database took {:.02?}.", before.elapsed()); eprintln!("Merging the word_position_docids database took {:.02?}.", before.elapsed());
drop(dest); drop(dest);
// merge the word attribute documents ids
let sources: Result<Vec<_>, _> = others.iter().zip(&rtxns).map(|((.., i), t)| i.word_attribute_docids.iter(t)).collect();
let sources = sources?;
let mut dest = index.word_attribute_docids.iter_mut(wtxn)?;
let before = Instant::now();
let mut current = None as Option<(&[u8], RoaringBitmap)>;
for result in MergeIter::new(sources) {
let (k, v) = result?;
match current.as_mut() {
Some((ck, cv)) if ck == &k => cv.union_with(&v),
Some((ck, cv)) => {
dest.append(&ck, &cv)?;
current = Some((k, v));
},
None => current = Some((k, v)),
};
}
if let Some((ck, cv)) = current.take() {
dest.append(&ck, &cv)?;
}
eprintln!("Merging the word_attribute_docids database took {:.02?}.", before.elapsed());
drop(dest);
// merge the documents // merge the documents
let sources: Result<Vec<_>, _> = others.iter().zip(&rtxns).map(|((.., i), t)| { let sources: Result<Vec<_>, _> = others.iter().zip(&rtxns).map(|((.., i), t)| {
i.documents.as_polymorph().iter::<_, ByteSlice, ByteSlice>(t) i.documents.as_polymorph().iter::<_, ByteSlice, ByteSlice>(t)
@ -461,7 +485,6 @@ fn main() -> anyhow::Result<()> {
let mut wtxn = env.write_txn()?; let mut wtxn = env.write_txn()?;
let parts = result?; let parts = result?;
merge_databases(parts, &mut wtxn, &index)?; merge_databases(parts, &mut wtxn, &index)?;
compute_words_attributes_docids(&mut wtxn, &index)?;
let count = index.documents.len(&wtxn)?; let count = index.documents.len(&wtxn)?;
wtxn.commit()?; wtxn.commit()?;