Hard or soft delete according to the deletion strategy

This commit is contained in:
Louis Dureuil 2022-12-19 09:38:59 +01:00
parent fc7618d49b
commit e2ae3b24aa
No known key found for this signature in database

View File

@ -186,33 +186,39 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
soft_deleted_docids |= &self.to_delete_docids; soft_deleted_docids |= &self.to_delete_docids;
// if we have less documents to delete than the threshold we simply save them in // decide for a hard or soft deletion depending on the strategy
// the `soft_deleted_documents_ids` bitmap and early exit. let soft_deletion = match self.strategy {
let size_used = self.index.used_size()?; DeletionStrategy::Dynamic => {
let map_size = self.index.env.map_size()? as u64; // if we have less documents to delete than the threshold we simply save them in
let nb_documents = self.index.number_of_documents(self.wtxn)?; // the `soft_deleted_documents_ids` bitmap and early exit.
let nb_soft_deleted = soft_deleted_docids.len(); let size_used = self.index.used_size()?;
let map_size = self.index.env.map_size()? as u64;
let nb_documents = self.index.number_of_documents(self.wtxn)?;
let nb_soft_deleted = soft_deleted_docids.len();
let percentage_available = 100 - (size_used * 100 / map_size); let percentage_available = 100 - (size_used * 100 / map_size);
let estimated_document_size = size_used / (nb_documents + nb_soft_deleted); let estimated_document_size = size_used / (nb_documents + nb_soft_deleted);
let estimated_size_used_by_soft_deleted = estimated_document_size * nb_soft_deleted; let estimated_size_used_by_soft_deleted = estimated_document_size * nb_soft_deleted;
let percentage_used_by_soft_deleted_documents = let percentage_used_by_soft_deleted_documents =
estimated_size_used_by_soft_deleted * 100 / map_size; estimated_size_used_by_soft_deleted * 100 / map_size;
// if we have more than 10% of disk space available and the soft deleted // if we have more than 10% of disk space available and the soft deleted
// documents uses less than 10% of the total space available, // documents uses less than 10% of the total space available,
// we skip the deletion. Eg. // we skip the deletion. Eg.
// - With 100Go of disk and 20Go used including 5Go of soft-deleted documents // - With 100Go of disk and 20Go used including 5Go of soft-deleted documents
// We dont delete anything. // We dont delete anything.
// - With 100Go of disk and 95Go used including 1mo of soft-deleted documents // - With 100Go of disk and 95Go used including 1mo of soft-deleted documents
// We run the deletion. // We run the deletion.
// - With 100Go of disk and 50Go used including 15Go of soft-deleted documents // - With 100Go of disk and 50Go used including 15Go of soft-deleted documents
// We run the deletion. // We run the deletion.
percentage_available > 10 && percentage_used_by_soft_deleted_documents < 10
}
DeletionStrategy::AlwaysSoft => true,
DeletionStrategy::AlwaysHard => false,
};
if !self.disable_soft_deletion if soft_deletion {
&& percentage_available > 10 // Keep the soft-deleted in the DB
&& percentage_used_by_soft_deleted_documents < 10
{
self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?; self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?;
return Ok(DetailedDocumentDeletionResult { return Ok(DetailedDocumentDeletionResult {
deleted_documents: self.to_delete_docids.len(), deleted_documents: self.to_delete_docids.len(),