Enable the clear documents optimization that wasn't working due to a bug

This commit is contained in:
Clément Renault 2020-10-29 13:52:00 +01:00
parent 1228c2948d
commit 855a251489
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
3 changed files with 17 additions and 8 deletions

View File

@ -7,6 +7,7 @@ use std::str::FromStr;
use std::sync::Arc;
use std::time::Instant;
use anyhow::anyhow;
use askama_warp::Template;
use flate2::read::GzDecoder;
use futures::stream;
@ -258,7 +259,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> {
}
},
UpdateMeta::DocumentsAdditionFromPath { path: _ } => {
todo!()
Err(anyhow!("indexing from a file is not supported yet"))
}
};

View File

@ -5,6 +5,7 @@ use fst::{IntoStreamer, Streamer};
use roaring::RoaringBitmap;
use crate::{Index, BEU32};
use super::ClearDocuments;
pub struct DeleteDocuments<'t, 'u, 'i> {
wtxn: &'t mut heed::RwTxn<'u>,
@ -54,13 +55,15 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
return Ok(0);
}
let current_documents_ids_len = documents_ids.len();
documents_ids.intersect_with(&self.documents_ids);
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
// TODO we should be able to execute a ClearDocuments operation when the number of documents
// to delete is exactly the number of documents in the database, however it seems that
// clearing a database in LMDB requires a commit for it to be effective, we can't clear
// and assume that the database is empty in the same wtxn or something.
// We can execute a ClearDocuments operation when the number of documents
// to delete is exactly the number of documents in the database.
if current_documents_ids_len == self.documents_ids.len() {
return ClearDocuments::new(self.wtxn, self.index).execute();
}
let fields_ids_map = self.index.fields_ids_map(self.wtxn)?;
let id_field = fields_ids_map.id("id").expect(r#"the field "id" to be present"#);

View File

@ -101,7 +101,9 @@ fn merge_into_lmdb_database(
WriteMethod::Append => {
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
while let Some((k, v)) = in_iter.next()? {
out_iter.append(k, v).with_context(|| format!("writing {:?} into LMDB", k.as_bstr()))?;
out_iter.append(k, v).with_context(|| {
format!("writing {:?} into LMDB", k.as_bstr())
})?;
}
},
WriteMethod::GetMergePut => {
@ -136,7 +138,9 @@ fn write_into_lmdb_database(
WriteMethod::Append => {
let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?;
while let Some((k, v)) = reader.next()? {
out_iter.append(k, v).with_context(|| format!("writing {:?} into LMDB", k.as_bstr()))?;
out_iter.append(k, v).with_context(|| {
format!("writing {:?} into LMDB", k.as_bstr())
})?;
}
},
WriteMethod::GetMergePut => {
@ -408,6 +412,7 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
// We merge the new documents ids with the existing ones.
documents_ids.union_with(&new_documents_ids);
documents_ids.union_with(&replaced_documents_ids);
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
debug!("Writing the docid word positions into LMDB on disk...");
@ -438,7 +443,7 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
self.index.main,
content,
main_merge,
write_method,
WriteMethod::GetMergePut,
)?;
},
DatabaseType::WordDocids => {