diff --git a/milli/src/index.rs b/milli/src/index.rs index e9d66a3ae..2d489fbd1 100644 --- a/milli/src/index.rs +++ b/milli/src/index.rs @@ -2111,4 +2111,80 @@ pub(crate) mod tests { "###); db_snap!(index, soft_deleted_documents_ids, 6, @"[]"); } + + #[test] + fn bug_3021_third() { + // https://github.com/meilisearch/meilisearch/issues/3021 + let mut index = TempIndex::new(); + index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments; + + index + .update_settings(|settings| { + settings.set_primary_key("primary_key".to_owned()); + }) + .unwrap(); + + index + .add_documents(documents!([ + { "primary_key": 3 }, + { "primary_key": 4 }, + { "primary_key": 5 } + ])) + .unwrap(); + + db_snap!(index, documents_ids, @"[0, 1, 2, ]"); + db_snap!(index, external_documents_ids, 1, @r###" + soft: + hard: + 3 0 + 4 1 + 5 2 + "###); + db_snap!(index, soft_deleted_documents_ids, 1, @"[]"); + + let mut wtxn = index.write_txn().unwrap(); + let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap(); + delete.delete_external_id("3"); + delete.execute().unwrap(); + wtxn.commit().unwrap(); + + db_snap!(index, documents_ids, @"[1, 2, ]"); + db_snap!(index, external_documents_ids, 2, @r###" + soft: + hard: + 3 0 + 4 1 + 5 2 + "###); + db_snap!(index, soft_deleted_documents_ids, 2, @"[0, ]"); + + index.index_documents_config.disable_soft_deletion = true; + + index.add_documents(documents!([{ "primary_key": "4", "a": 2 }])).unwrap(); + + db_snap!(index, documents_ids, @"[2, 3, ]"); + db_snap!(index, external_documents_ids, 2, @r###" + soft: + hard: + 4 3 + 5 2 + "###); + db_snap!(index, soft_deleted_documents_ids, 2, @"[]"); + + index + .add_documents(documents!([ + { "primary_key": "3" }, + ])) + .unwrap(); + + db_snap!(index, documents_ids, @"[0, 2, 3, ]"); + db_snap!(index, external_documents_ids, 2, @r###" + soft: + hard: + 3 0 + 4 3 + 5 2 + "###); + db_snap!(index, soft_deleted_documents_ids, 2, @"[]"); + } } diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index 88ec78420..6c0f66685 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -29,12 +29,24 @@ pub struct DeleteDocuments<'t, 'u, 'i> { disable_soft_deletion: bool, } +/// Result of a [`DeleteDocuments`] operation. #[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)] pub struct DocumentDeletionResult { pub deleted_documents: u64, pub remaining_documents: u64, } +/// Result of a [`DeleteDocuments`] operation, used for internal purposes. +/// +/// It is a superset of the [`DocumentDeletionResult`] structure, giving +/// additional information about the algorithm used to delete the documents. +#[derive(Debug)] +pub(crate) struct DetailedDocumentDeletionResult { + pub deleted_documents: u64, + pub remaining_documents: u64, + pub soft_deletion_used: bool, +} + impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { pub fn new( wtxn: &'t mut heed::RwTxn<'i, 'u>, @@ -68,8 +80,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { self.delete_document(docid); Some(docid) } + pub fn execute(self) -> Result { + let DetailedDocumentDeletionResult { + deleted_documents, + remaining_documents, + soft_deletion_used: _, + } = self.execute_inner()?; - pub fn execute(mut self) -> Result { + Ok(DocumentDeletionResult { deleted_documents, remaining_documents }) + } + pub(crate) fn execute_inner(mut self) -> Result { self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?; // We retrieve the current documents ids that are in the database. @@ -83,7 +103,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { if !soft_deleted_docids.is_empty() { ClearDocuments::new(self.wtxn, self.index).execute()?; } - return Ok(DocumentDeletionResult { deleted_documents: 0, remaining_documents: 0 }); + return Ok(DetailedDocumentDeletionResult { + deleted_documents: 0, + remaining_documents: 0, + soft_deletion_used: false, + }); } // We remove the documents ids that we want to delete @@ -95,9 +119,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { // to delete is exactly the number of documents in the database. if current_documents_ids_len == self.to_delete_docids.len() { let remaining_documents = ClearDocuments::new(self.wtxn, self.index).execute()?; - return Ok(DocumentDeletionResult { + return Ok(DetailedDocumentDeletionResult { deleted_documents: current_documents_ids_len, remaining_documents, + soft_deletion_used: false, }); } @@ -159,9 +184,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { && percentage_used_by_soft_deleted_documents < 10 { self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?; - return Ok(DocumentDeletionResult { + return Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), + soft_deletion_used: true, }); } @@ -488,9 +514,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { &self.to_delete_docids, )?; - Ok(DocumentDeletionResult { + Ok(DetailedDocumentDeletionResult { deleted_documents: self.to_delete_docids.len(), remaining_documents: documents_ids.len(), + soft_deletion_used: false, }) } } diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index db6ffedc1..74a8d2779 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -210,7 +210,7 @@ where primary_key, fields_ids_map, field_distribution, - external_documents_ids, + mut external_documents_ids, new_documents_ids, replaced_documents_ids, documents_count, @@ -335,8 +335,11 @@ where deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion); debug!("documents to delete {:?}", replaced_documents_ids); deletion_builder.delete_documents(&replaced_documents_ids); - let deleted_documents_count = deletion_builder.execute()?; - debug!("{} documents actually deleted", deleted_documents_count.deleted_documents); + let deleted_documents_result = deletion_builder.execute_inner()?; + debug!("{} documents actually deleted", deleted_documents_result.deleted_documents); + if !deleted_documents_result.soft_deletion_used { + external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?; + } } let index_documents_ids = self.index.documents_ids(self.wtxn)?;