Remove soft deleted ids from ExternalDocumentIds during document import

If the document import replaces a document using hard deletion
This commit is contained in:
Loïc Lecrenier 2022-12-12 12:42:55 +01:00
parent bebd050961
commit e3ee553dcc
3 changed files with 42 additions and 10 deletions

View File

@ -2166,17 +2166,25 @@ pub(crate) mod tests {
db_snap!(index, external_documents_ids, 2, @r###"
soft:
hard:
3 0
4 3
5 2
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[]");
// boom
index
.add_documents(documents!([
{ "primary_key": "3" },
]))
.unwrap();
db_snap!(index, documents_ids, @"[0, 2, 3, ]");
db_snap!(index, external_documents_ids, 2, @r###"
soft:
hard:
3 0
4 3
5 2
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[]");
}
}

View File

@ -34,6 +34,12 @@ pub struct DocumentDeletionResult {
pub deleted_documents: u64,
pub remaining_documents: u64,
}
#[derive(Debug)]
pub struct DetailedDocumentDeletionResult {
pub deleted_documents: u64,
pub remaining_documents: u64,
pub used_soft_deletion: bool,
}
impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
pub fn new(
@ -68,8 +74,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
self.delete_document(docid);
Some(docid)
}
pub fn execute(self) -> Result<DocumentDeletionResult> {
let DetailedDocumentDeletionResult {
deleted_documents,
remaining_documents,
used_soft_deletion: _,
} = self.execute_inner()?;
pub fn execute(mut self) -> Result<DocumentDeletionResult> {
Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
}
pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
// We retrieve the current documents ids that are in the database.
@ -83,7 +97,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
if !soft_deleted_docids.is_empty() {
ClearDocuments::new(self.wtxn, self.index).execute()?;
}
return Ok(DocumentDeletionResult { deleted_documents: 0, remaining_documents: 0 });
return Ok(DetailedDocumentDeletionResult {
deleted_documents: 0,
remaining_documents: 0,
used_soft_deletion: false,
});
}
// We remove the documents ids that we want to delete
@ -95,9 +113,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
// to delete is exactly the number of documents in the database.
if current_documents_ids_len == self.to_delete_docids.len() {
let remaining_documents = ClearDocuments::new(self.wtxn, self.index).execute()?;
return Ok(DocumentDeletionResult {
return Ok(DetailedDocumentDeletionResult {
deleted_documents: current_documents_ids_len,
remaining_documents,
used_soft_deletion: false,
});
}
@ -159,9 +178,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
&& percentage_used_by_soft_deleted_documents < 10
{
self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?;
return Ok(DocumentDeletionResult {
return Ok(DetailedDocumentDeletionResult {
deleted_documents: self.to_delete_docids.len(),
remaining_documents: documents_ids.len(),
used_soft_deletion: true,
});
}
@ -488,9 +508,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
&self.to_delete_docids,
)?;
Ok(DocumentDeletionResult {
Ok(DetailedDocumentDeletionResult {
deleted_documents: self.to_delete_docids.len(),
remaining_documents: documents_ids.len(),
used_soft_deletion: false,
})
}
}

View File

@ -210,7 +210,7 @@ where
primary_key,
fields_ids_map,
field_distribution,
external_documents_ids,
mut external_documents_ids,
new_documents_ids,
replaced_documents_ids,
documents_count,
@ -335,8 +335,11 @@ where
deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion);
debug!("documents to delete {:?}", replaced_documents_ids);
deletion_builder.delete_documents(&replaced_documents_ids);
let deleted_documents_count = deletion_builder.execute()?;
debug!("{} documents actually deleted", deleted_documents_count.deleted_documents);
let deleted_documents_result = deletion_builder.execute_inner()?;
debug!("{} documents actually deleted", deleted_documents_result.deleted_documents);
if !deleted_documents_result.used_soft_deletion {
external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?;
}
}
let index_documents_ids = self.index.documents_ids(self.wtxn)?;