From bebd050961f5ecd61404dbd59aac2bfee820a0c3 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Thu, 8 Dec 2022 19:18:25 +0100 Subject: [PATCH 1/3] Add new test for bug 3021 --- milli/src/index.rs | 68 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 68 insertions(+) diff --git a/milli/src/index.rs b/milli/src/index.rs index e9d66a3ae..8855d51cd 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2111,4 +2111,72 @@ pub(crate) mod tests { "###); db_snap!(index, soft_deleted_documents_ids, 6, @"[]"); } + + #[test] + fn bug_3021_third() { + // https://github.com/meilisearch/meilisearch/issues/3021 + let mut index = TempIndex::new(); + index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments; + + index + .update_settings(|settings| { + settings.set_primary_key("primary_key".to_owned()); + }) + .unwrap(); + + index + .add_documents(documents!([ + { "primary_key": 3 }, + { "primary_key": 4 }, + { "primary_key": 5 } + ])) + .unwrap(); + + db_snap!(index, documents_ids, @"[0, 1, 2, ]"); + db_snap!(index, external_documents_ids, 1, @r###" + soft: + hard: + 3 0 + 4 1 + 5 2 + "###); + db_snap!(index, soft_deleted_documents_ids, 1, @"[]"); + + let mut wtxn = index.write_txn().unwrap(); + let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap(); + delete.delete_external_id("3"); + delete.execute().unwrap(); + wtxn.commit().unwrap(); + + db_snap!(index, documents_ids, @"[1, 2, ]"); + db_snap!(index, external_documents_ids, 2, @r###" + soft: + hard: + 3 0 + 4 1 + 5 2 + "###); + db_snap!(index, soft_deleted_documents_ids, 2, @"[0, ]"); + + index.index_documents_config.disable_soft_deletion = true; + + index.add_documents(documents!([{ "primary_key": "4", "a": 2 }])).unwrap(); + + db_snap!(index, documents_ids, @"[2, 3, ]"); + db_snap!(index, external_documents_ids, 2, @r###" + soft: + hard: + 3 0 + 4 3 + 5 2 + "###); + db_snap!(index, soft_deleted_documents_ids, 2, @"[]"); + + // boom + index + .add_documents(documents!([ + { "primary_key": "3" }, + ])) + .unwrap(); + } } From e3ee553dcca16e5ff5e688da6230acdab8eacc67 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Mon, 12 Dec 2022 12:42:55 +0100 Subject: [PATCH 2/3] Remove soft deleted ids from ExternalDocumentIds during document import If the document import replaces a document using hard deletion --- milli/src/index.rs | 12 ++++++++-- milli/src/update/delete_documents.rs | 31 +++++++++++++++++++++---- milli/src/update/index_documents/mod.rs | 9 ++++--- 3 files changed, 42 insertions(+), 10 deletions(-) diff --git a/milli/src/index.rs b/milli/src/index.rs index 8855d51cd..2d489fbd1 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2166,17 +2166,25 @@ pub(crate) mod tests { db_snap!(index, external_documents_ids, 2, @r###" soft: hard: - 3 0 4 3 5 2 "###); db_snap!(index, soft_deleted_documents_ids, 2, @"[]"); - // boom index .add_documents(documents!([ { "primary_key": "3" }, ])) .unwrap(); + + db_snap!(index, documents_ids, @"[0, 2, 3, ]"); + db_snap!(index, external_documents_ids, 2, @r###" + soft: + hard: + 3 0 + 4 3 + 5 2 + "###); + db_snap!(index, soft_deleted_documents_ids, 2, @"[]"); } } diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 88ec78420..0f77e2b13 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -34,6 +34,12 @@ pub struct DocumentDeletionResult { pub deleted_documents: u64, pub remaining_documents: u64, } +#[derive(Debug)] +pub struct DetailedDocumentDeletionResult { + pub deleted_documents: u64, + pub remaining_documents: u64, + pub used_soft_deletion: bool, +} impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { pub fn new( @@ -68,8 +74,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { self.delete_document(docid); Some(docid) } + pub fn execute(self) -> Result { + let DetailedDocumentDeletionResult { + deleted_documents, + remaining_documents, + used_soft_deletion: _, + } = self.execute_inner()?; - pub fn execute(mut self) -> Result { + Ok(DocumentDeletionResult { deleted_documents, remaining_documents }) + } + pub(crate) fn execute_inner(mut self) -> Result { self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; // We retrieve the current documents ids that are in the database. @@ -83,7 +97,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { if !soft_deleted_docids.is_empty() { ClearDocuments::new(self.wtxn, self.index).execute()?; } - return Ok(DocumentDeletionResult { deleted_documents: 0, remaining_documents: 0 }); + return Ok(DetailedDocumentDeletionResult { + deleted_documents: 0, + remaining_documents: 0, + used_soft_deletion: false, + }); } // We remove the documents ids that we want to delete @@ -95,9 +113,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { // to delete is exactly the number of documents in the database. if current_documents_ids_len == self.to_delete_docids.len() { let remaining_documents = ClearDocuments::new(self.wtxn, self.index).execute()?; - return Ok(DocumentDeletionResult { + return Ok(DetailedDocumentDeletionResult { deleted_documents: current_documents_ids_len, remaining_documents, + used_soft_deletion: false, }); } @@ -159,9 +178,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { && percentage_used_by_soft_deleted_documents < 10 { self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?; - return Ok(DocumentDeletionResult { + return Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), + used_soft_deletion: true, }); } @@ -488,9 +508,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { &self.to_delete_docids, )?; - Ok(DocumentDeletionResult { + Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), + used_soft_deletion: false, }) } } diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index db6ffedc1..478a74065 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -210,7 +210,7 @@ where primary_key, fields_ids_map, field_distribution, - external_documents_ids, + mut external_documents_ids, new_documents_ids, replaced_documents_ids, documents_count, @@ -335,8 +335,11 @@ where deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion); debug!("documents to delete {:?}", replaced_documents_ids); deletion_builder.delete_documents(&replaced_documents_ids); - let deleted_documents_count = deletion_builder.execute()?; - debug!("{} documents actually deleted", deleted_documents_count.deleted_documents); + let deleted_documents_result = deletion_builder.execute_inner()?; + debug!("{} documents actually deleted", deleted_documents_result.deleted_documents); + if !deleted_documents_result.used_soft_deletion { + external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; + } } let index_documents_ids = self.index.documents_ids(self.wtxn)?; From be3b00350c9fed3c6d1bd2e94e78b8af5e81897a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Lo=C3=AFc=20Lecrenier?= Date: Tue, 13 Dec 2022 10:15:22 +0100 Subject: [PATCH 3/3] Apply review suggestions: naming and documentation --- milli/src/update/delete_documents.rs | 20 +++++++++++++------- milli/src/update/index_documents/mod.rs | 2 +- 2 files changed, 14 insertions(+), 8 deletions(-) diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 0f77e2b13..6c0f66685 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -29,16 +29,22 @@ pub struct DeleteDocuments<'t, 'u, 'i> { disable_soft_deletion: bool, } +/// Result of a [`DeleteDocuments`] operation. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct DocumentDeletionResult { pub deleted_documents: u64, pub remaining_documents: u64, } + +/// Result of a [`DeleteDocuments`] operation, used for internal purposes. +/// +/// It is a superset of the [`DocumentDeletionResult`] structure, giving +/// additional information about the algorithm used to delete the documents. #[derive(Debug)] -pub struct DetailedDocumentDeletionResult { +pub(crate) struct DetailedDocumentDeletionResult { pub deleted_documents: u64, pub remaining_documents: u64, - pub used_soft_deletion: bool, + pub soft_deletion_used: bool, } impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { @@ -78,7 +84,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { let DetailedDocumentDeletionResult { deleted_documents, remaining_documents, - used_soft_deletion: _, + soft_deletion_used: _, } = self.execute_inner()?; Ok(DocumentDeletionResult { deleted_documents, remaining_documents }) @@ -100,7 +106,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { return Ok(DetailedDocumentDeletionResult { deleted_documents: 0, remaining_documents: 0, - used_soft_deletion: false, + soft_deletion_used: false, }); } @@ -116,7 +122,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { return Ok(DetailedDocumentDeletionResult { deleted_documents: current_documents_ids_len, remaining_documents, - used_soft_deletion: false, + soft_deletion_used: false, }); } @@ -181,7 +187,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { return Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), - used_soft_deletion: true, + soft_deletion_used: true, }); } @@ -511,7 +517,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), - used_soft_deletion: false, + soft_deletion_used: false, }) } } diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 478a74065..74a8d2779 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -337,7 +337,7 @@ where deletion_builder.delete_documents(&replaced_documents_ids); let deleted_documents_result = deletion_builder.execute_inner()?; debug!("{} documents actually deleted", deleted_documents_result.deleted_documents); - if !deleted_documents_result.used_soft_deletion { + if !deleted_documents_result.soft_deletion_used { external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; } }