From 855a2514894bc9cba6b4227ab849f72f8f21a1a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Thu, 29 Oct 2020 13:52:00 +0100 Subject: [PATCH] Enable the clear documents optimization that wasn't working due to a bug --- src/subcommand/serve.rs | 3 ++- src/update/delete_documents.rs | 11 +++++++---- src/update/index_documents/mod.rs | 11 ++++++++--- 3 files changed, 17 insertions(+), 8 deletions(-) diff --git a/src/subcommand/serve.rs b/src/subcommand/serve.rs index 471a1da52..7fa021de9 100644 --- a/src/subcommand/serve.rs +++ b/src/subcommand/serve.rs @@ -7,6 +7,7 @@ use std::str::FromStr; use std::sync::Arc; use std::time::Instant; +use anyhow::anyhow; use askama_warp::Template; use flate2::read::GzDecoder; use futures::stream; @@ -258,7 +259,7 @@ pub fn run(opt: Opt) -> anyhow::Result<()> { } }, UpdateMeta::DocumentsAdditionFromPath { path: _ } => { - todo!() + Err(anyhow!("indexing from a file is not supported yet")) } }; diff --git a/src/update/delete_documents.rs b/src/update/delete_documents.rs index b72140791..815403865 100644 --- a/src/update/delete_documents.rs +++ b/src/update/delete_documents.rs @@ -5,6 +5,7 @@ use fst::{IntoStreamer, Streamer}; use roaring::RoaringBitmap; use crate::{Index, BEU32}; +use super::ClearDocuments; pub struct DeleteDocuments<'t, 'u, 'i> { wtxn: &'t mut heed::RwTxn<'u>, @@ -54,13 +55,15 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { return Ok(0); } + let current_documents_ids_len = documents_ids.len(); documents_ids.intersect_with(&self.documents_ids); self.index.put_documents_ids(self.wtxn, &documents_ids)?; - // TODO we should be able to execute a ClearDocuments operation when the number of documents - // to delete is exactly the number of documents in the database, however it seems that - // clearing a database in LMDB requires a commit for it to be effective, we can't clear - // and assume that the database is empty in the same wtxn or something. + // We can execute a ClearDocuments operation when the number of documents + // to delete is exactly the number of documents in the database. + if current_documents_ids_len == self.documents_ids.len() { + return ClearDocuments::new(self.wtxn, self.index).execute(); + } let fields_ids_map = self.index.fields_ids_map(self.wtxn)?; let id_field = fields_ids_map.id("id").expect(r#"the field "id" to be present"#); diff --git a/src/update/index_documents/mod.rs b/src/update/index_documents/mod.rs index 1b3b1d36c..b9db4c72d 100644 --- a/src/update/index_documents/mod.rs +++ b/src/update/index_documents/mod.rs @@ -101,7 +101,9 @@ fn merge_into_lmdb_database( WriteMethod::Append => { let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?; while let Some((k, v)) = in_iter.next()? { - out_iter.append(k, v).with_context(|| format!("writing {:?} into LMDB", k.as_bstr()))?; + out_iter.append(k, v).with_context(|| { + format!("writing {:?} into LMDB", k.as_bstr()) + })?; } }, WriteMethod::GetMergePut => { @@ -136,7 +138,9 @@ fn write_into_lmdb_database( WriteMethod::Append => { let mut out_iter = database.iter_mut::<_, ByteSlice, ByteSlice>(wtxn)?; while let Some((k, v)) = reader.next()? { - out_iter.append(k, v).with_context(|| format!("writing {:?} into LMDB", k.as_bstr()))?; + out_iter.append(k, v).with_context(|| { + format!("writing {:?} into LMDB", k.as_bstr()) + })?; } }, WriteMethod::GetMergePut => { @@ -408,6 +412,7 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> { // We merge the new documents ids with the existing ones. documents_ids.union_with(&new_documents_ids); + documents_ids.union_with(&replaced_documents_ids); self.index.put_documents_ids(self.wtxn, &documents_ids)?; debug!("Writing the docid word positions into LMDB on disk..."); @@ -438,7 +443,7 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> { self.index.main, content, main_merge, - write_method, + WriteMethod::GetMergePut, )?; }, DatabaseType::WordDocids => {