mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-11 22:14:32 +01:00
Merge #734
734: Fix bug 2945/3021 (missing key in documents database) r=Kerollmops a=loiclec # Pull Request ## Related issue Fixes (partially, until merged into meilisearch) https://github.com/meilisearch/meilisearch/issues/2945 (until we integrate the new milli bump into meilisearch). **Note that a dump will not be sufficient to upgrade from meilisearch v0.30.2 to meilisearch v0.30.3 due to this fix** because the bug could have caused the `documents` database to be corrupted. Instead, a full manual reimport of the documents will be necessary. ## What does this PR do? There was a bug happening when: 1. A few documents are added to the index 2. Some of these documents are soft-deleted 3. New documents are added, replacing existing ones and triggering a hard-deletion The `IndexDocuments::execute` method would then perform the hard-deletion but forget to change the `external_document_ids` structure appropriately. As a result, the `external_document_ids` would contain keys corresponding to documents that do no exist anymore. To fix this bug, I split the `DeleteDocuments::execute` method into two: `execute_inner` and `execute`. - `execute_inner` returns a `DetailedDocumentDeletionResult` which says whether soft-deletion was used or not - `execute` keeps the exact same signature and behaviour Then, when deleting replaced documents inside `IndexDocuments::execute`, we call `DeleteDocuments::execute_inner` instead of `DeleteDocuments::execute`. If soft-deletion was used, nothing more is done. But if hard-deletion was used, we remove every reference to soft-deleted documents in the new `external_documents_ids` structure. ## Correctness - Every other test still passes - The reproduction test case now passes - In a different branch ([`update-fuzz-test`](https://github.com/meilisearch/milli/pull/735)), I created a fuzz-test that reproduces the past two bugs. This fuzz test cannot find this bug through any combination of some hand-selected `DocumentAddition / DocumentDeletion / DocumentClear / SettingsUpdate` operations. In that test, each relevant operations can be executed with or without soft-deletion, and document additions can be done in batches, replacing or updating existing documents. Co-authored-by: Loïc Lecrenier <loic.lecrenier@me.com>
This commit is contained in:
commit
e0a8f8cb5a
@ -2111,4 +2111,80 @@ pub(crate) mod tests {
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bug_3021_third() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/3021
|
||||
let mut index = TempIndex::new();
|
||||
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("primary_key".to_owned());
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "primary_key": 3 },
|
||||
{ "primary_key": 4 },
|
||||
{ "primary_key": 5 }
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
|
||||
db_snap!(index, external_documents_ids, 1, @r###"
|
||||
soft:
|
||||
hard:
|
||||
3 0
|
||||
4 1
|
||||
5 2
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
delete.delete_external_id("3");
|
||||
delete.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[1, 2, ]");
|
||||
db_snap!(index, external_documents_ids, 2, @r###"
|
||||
soft:
|
||||
hard:
|
||||
3 0
|
||||
4 1
|
||||
5 2
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 2, @"[0, ]");
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = true;
|
||||
|
||||
index.add_documents(documents!([{ "primary_key": "4", "a": 2 }])).unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[2, 3, ]");
|
||||
db_snap!(index, external_documents_ids, 2, @r###"
|
||||
soft:
|
||||
hard:
|
||||
4 3
|
||||
5 2
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 2, @"[]");
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "primary_key": "3" },
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 2, 3, ]");
|
||||
db_snap!(index, external_documents_ids, 2, @r###"
|
||||
soft:
|
||||
hard:
|
||||
3 0
|
||||
4 3
|
||||
5 2
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 2, @"[]");
|
||||
}
|
||||
}
|
||||
|
@ -29,12 +29,24 @@ pub struct DeleteDocuments<'t, 'u, 'i> {
|
||||
disable_soft_deletion: bool,
|
||||
}
|
||||
|
||||
/// Result of a [`DeleteDocuments`] operation.
|
||||
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize)]
|
||||
pub struct DocumentDeletionResult {
|
||||
pub deleted_documents: u64,
|
||||
pub remaining_documents: u64,
|
||||
}
|
||||
|
||||
/// Result of a [`DeleteDocuments`] operation, used for internal purposes.
|
||||
///
|
||||
/// It is a superset of the [`DocumentDeletionResult`] structure, giving
|
||||
/// additional information about the algorithm used to delete the documents.
|
||||
#[derive(Debug)]
|
||||
pub(crate) struct DetailedDocumentDeletionResult {
|
||||
pub deleted_documents: u64,
|
||||
pub remaining_documents: u64,
|
||||
pub soft_deletion_used: bool,
|
||||
}
|
||||
|
||||
impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
pub fn new(
|
||||
wtxn: &'t mut heed::RwTxn<'i, 'u>,
|
||||
@ -68,8 +80,16 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
self.delete_document(docid);
|
||||
Some(docid)
|
||||
}
|
||||
pub fn execute(self) -> Result<DocumentDeletionResult> {
|
||||
let DetailedDocumentDeletionResult {
|
||||
deleted_documents,
|
||||
remaining_documents,
|
||||
soft_deletion_used: _,
|
||||
} = self.execute_inner()?;
|
||||
|
||||
pub fn execute(mut self) -> Result<DocumentDeletionResult> {
|
||||
Ok(DocumentDeletionResult { deleted_documents, remaining_documents })
|
||||
}
|
||||
pub(crate) fn execute_inner(mut self) -> Result<DetailedDocumentDeletionResult> {
|
||||
self.index.set_updated_at(self.wtxn, &OffsetDateTime::now_utc())?;
|
||||
|
||||
// We retrieve the current documents ids that are in the database.
|
||||
@ -83,7 +103,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
if !soft_deleted_docids.is_empty() {
|
||||
ClearDocuments::new(self.wtxn, self.index).execute()?;
|
||||
}
|
||||
return Ok(DocumentDeletionResult { deleted_documents: 0, remaining_documents: 0 });
|
||||
return Ok(DetailedDocumentDeletionResult {
|
||||
deleted_documents: 0,
|
||||
remaining_documents: 0,
|
||||
soft_deletion_used: false,
|
||||
});
|
||||
}
|
||||
|
||||
// We remove the documents ids that we want to delete
|
||||
@ -95,9 +119,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
// to delete is exactly the number of documents in the database.
|
||||
if current_documents_ids_len == self.to_delete_docids.len() {
|
||||
let remaining_documents = ClearDocuments::new(self.wtxn, self.index).execute()?;
|
||||
return Ok(DocumentDeletionResult {
|
||||
return Ok(DetailedDocumentDeletionResult {
|
||||
deleted_documents: current_documents_ids_len,
|
||||
remaining_documents,
|
||||
soft_deletion_used: false,
|
||||
});
|
||||
}
|
||||
|
||||
@ -159,9 +184,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
&& percentage_used_by_soft_deleted_documents < 10
|
||||
{
|
||||
self.index.put_soft_deleted_documents_ids(self.wtxn, &soft_deleted_docids)?;
|
||||
return Ok(DocumentDeletionResult {
|
||||
return Ok(DetailedDocumentDeletionResult {
|
||||
deleted_documents: self.to_delete_docids.len(),
|
||||
remaining_documents: documents_ids.len(),
|
||||
soft_deletion_used: true,
|
||||
});
|
||||
}
|
||||
|
||||
@ -488,9 +514,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
||||
&self.to_delete_docids,
|
||||
)?;
|
||||
|
||||
Ok(DocumentDeletionResult {
|
||||
Ok(DetailedDocumentDeletionResult {
|
||||
deleted_documents: self.to_delete_docids.len(),
|
||||
remaining_documents: documents_ids.len(),
|
||||
soft_deletion_used: false,
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -210,7 +210,7 @@ where
|
||||
primary_key,
|
||||
fields_ids_map,
|
||||
field_distribution,
|
||||
external_documents_ids,
|
||||
mut external_documents_ids,
|
||||
new_documents_ids,
|
||||
replaced_documents_ids,
|
||||
documents_count,
|
||||
@ -335,8 +335,11 @@ where
|
||||
deletion_builder.disable_soft_deletion(self.config.disable_soft_deletion);
|
||||
debug!("documents to delete {:?}", replaced_documents_ids);
|
||||
deletion_builder.delete_documents(&replaced_documents_ids);
|
||||
let deleted_documents_count = deletion_builder.execute()?;
|
||||
debug!("{} documents actually deleted", deleted_documents_count.deleted_documents);
|
||||
let deleted_documents_result = deletion_builder.execute_inner()?;
|
||||
debug!("{} documents actually deleted", deleted_documents_result.deleted_documents);
|
||||
if !deleted_documents_result.soft_deletion_used {
|
||||
external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?;
|
||||
}
|
||||
}
|
||||
|
||||
let index_documents_ids = self.index.documents_ids(self.wtxn)?;
|
||||
|
Loading…
x
Reference in New Issue
Block a user