Fix bug in handling of soft deleted documents when updating settings

This commit is contained in:
Loïc Lecrenier 2022-12-06 11:38:15 +01:00
parent d6eacb2aac
commit 67d8cec209
4 changed files with 310 additions and 182 deletions

View file

@ -1185,13 +1185,15 @@ pub(crate) mod tests {
use big_s::S;
use heed::{EnvOpenOptions, RwTxn};
use maplit::hashset;
use tempfile::TempDir;
use crate::documents::DocumentsBatchReader;
use crate::error::{Error, InternalError};
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
use crate::update::{
self, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
self, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
IndexerConfig, Settings,
};
use crate::{db_snap, obkv_to_json, Index};
@ -1485,7 +1487,7 @@ pub(crate) mod tests {
use big_s::S;
use maplit::hashset;
let mut index = TempIndex::new();
let index = TempIndex::new();
index
.update_settings(|settings| {
@ -1544,7 +1546,6 @@ pub(crate) mod tests {
1 0 3 1 [3, 6, ]
"###);
index.index_documents_config.disable_soft_deletion = false;
index
.add_documents(documents!([{ "id": 3, "doggo": 4 }, { "id": 3, "doggo": 5 },{ "id": 3, "doggo": 4 }]))
.unwrap();
@ -1568,7 +1569,6 @@ pub(crate) mod tests {
1 0 4 1 [7, ]
"###);
index.index_documents_config.disable_soft_deletion = false;
index
.update_settings(|settings| {
settings.set_distinct_field("id".to_owned());
@ -1576,37 +1576,13 @@ pub(crate) mod tests {
.unwrap();
db_snap!(index, documents_ids, @"[4, 5, 6, 7, ]");
db_snap!(index, external_documents_ids, 3, @r###"
soft:
3 7
hard:
0 4
1 5
2 6
3 3
"###);
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
db_snap!(index, facet_id_f64_docids, 3, @r###"
0 0 0 1 [4, ]
0 0 1 1 [5, ]
0 0 2 1 [6, ]
0 0 3 1 [7, ]
1 0 1 1 [4, ]
1 0 2 1 [5, ]
1 0 3 1 [6, ]
1 0 4 1 [7, ]
"###);
index.index_documents_config.disable_soft_deletion = true;
index.add_documents(documents!([{ "id": 3, "doggo": 4 }])).unwrap();
db_snap!(index, external_documents_ids, 3, @r###"
soft:
3 7
hard:
0 4
1 5
2 6
3 3
3 7
"###);
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
db_snap!(index, facet_id_f64_docids, 3, @r###"
@ -1619,140 +1595,6 @@ pub(crate) mod tests {
1 0 3 1 [6, ]
1 0 4 1 [7, ]
"###);
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
let docid = delete.delete_external_id("3").unwrap();
insta::assert_snapshot!(format!("{docid}"), @"7");
delete.execute().unwrap();
wtxn.commit().unwrap();
db_snap!(index, documents_ids, @"[4, 5, 6, ]");
db_snap!(index, external_documents_ids, 4, @r###"
soft:
3 7
hard:
0 4
1 5
2 6
3 3
"###);
db_snap!(index, soft_deleted_documents_ids, 4, @"[7, ]");
db_snap!(index, facet_id_f64_docids, 4, @r###"
0 0 0 1 [4, ]
0 0 1 1 [5, ]
0 0 2 1 [6, ]
0 0 3 1 [7, ]
1 0 1 1 [4, ]
1 0 2 1 [5, ]
1 0 3 1 [6, ]
1 0 4 1 [7, ]
"###);
index.index_documents_config.disable_soft_deletion = false;
index.add_documents(documents!([{ "id": 3, "doggo": 4 }])).unwrap();
db_snap!(index, external_documents_ids, 4, @r###"
soft:
3 0
hard:
0 4
1 5
2 6
3 3
"###);
db_snap!(index, soft_deleted_documents_ids, 4, @"[7, ]");
db_snap!(index, facet_id_f64_docids, 4, @r###"
0 0 0 1 [4, ]
0 0 1 1 [5, ]
0 0 2 1 [6, ]
0 0 3 1 [0, 7, ]
1 0 1 1 [4, ]
1 0 2 1 [5, ]
1 0 3 1 [6, ]
1 0 4 1 [0, 7, ]
"###);
index.index_documents_config.disable_soft_deletion = false;
index.add_documents(documents!([{ "id": 3, "doggo": 5 }])).unwrap();
db_snap!(index, external_documents_ids, 4, @r###"
soft:
3 1
hard:
0 4
1 5
2 6
3 3
"###);
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 7, ]");
db_snap!(index, facet_id_f64_docids, 4, @r###"
0 0 0 1 [4, ]
0 0 1 1 [5, ]
0 0 2 1 [6, ]
0 0 3 1 [0, 1, 7, ]
1 0 1 1 [4, ]
1 0 2 1 [5, ]
1 0 3 1 [6, ]
1 0 4 1 [0, 7, ]
1 0 5 1 [1, ]
"###);
index.index_documents_config.disable_soft_deletion = false;
index.add_documents(documents!([{ "id": 3, "doggo": 5, "id": 2, "doggo": 4 }])).unwrap();
db_snap!(index, external_documents_ids, 4, @r###"
soft:
hard:
0 4
1 5
2 2
3 1
"###);
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 6, 7, ]");
db_snap!(index, facet_id_f64_docids, 4, @r###"
0 0 0 1 [4, ]
0 0 1 1 [5, ]
0 0 2 1 [2, 6, ]
0 0 3 1 [0, 1, 7, ]
1 0 1 1 [4, ]
1 0 2 1 [5, ]
1 0 3 1 [6, ]
1 0 4 1 [0, 2, 7, ]
1 0 5 1 [1, ]
"###);
index.index_documents_config.disable_soft_deletion = false;
index
.add_documents(documents!([{ "id": 4, "doggo": 5 }, { "id": 3, "doggo": 5 }]))
.unwrap();
db_snap!(index, external_documents_ids, 4, @r###"
soft:
4 3
hard:
0 4
1 5
2 2
3 1
"###);
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 6, 7, ]");
db_snap!(index, facet_id_f64_docids, 4, @r###"
0 0 0 1 [4, ]
0 0 1 1 [5, ]
0 0 2 1 [2, 6, ]
0 0 3 1 [0, 1, 7, ]
0 0 4 1 [3, ]
1 0 1 1 [4, ]
1 0 2 1 [5, ]
1 0 3 1 [6, ]
1 0 4 1 [0, 2, 7, ]
1 0 5 1 [1, 3, ]
"###);
}
#[test]
@ -2020,4 +1862,253 @@ pub(crate) mod tests {
drop(rtxn);
}
}
#[test]
fn bug_3021_first() {
// https://github.com/meilisearch/meilisearch/issues/3021
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
index
.update_settings(|settings| {
settings.set_primary_key("primary_key".to_owned());
})
.unwrap();
index
.add_documents(documents!([
{ "primary_key": 38 },
{ "primary_key": 34 }
]))
.unwrap();
db_snap!(index, documents_ids, @"[0, 1, ]");
db_snap!(index, external_documents_ids, 1, @r###"
soft:
hard:
34 1
38 0
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.delete_external_id("34");
delete.execute().unwrap();
wtxn.commit().unwrap();
db_snap!(index, documents_ids, @"[0, ]");
db_snap!(index, external_documents_ids, 2, @r###"
soft:
hard:
34 1
38 0
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]");
index
.update_settings(|s| {
s.set_searchable_fields(vec![]);
})
.unwrap();
// The key point of the test is to verify that the external documents ids
// do not contain any entry for previously soft-deleted document ids
db_snap!(index, documents_ids, @"[0, ]");
db_snap!(index, external_documents_ids, 3, @r###"
soft:
hard:
38 0
"###);
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
// So that this document addition works correctly now.
// It would be wrongly interpreted as a replacement before
index.add_documents(documents!({ "primary_key": 34 })).unwrap();
db_snap!(index, documents_ids, @"[0, 1, ]");
db_snap!(index, external_documents_ids, 4, @r###"
soft:
hard:
34 1
38 0
"###);
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
// We do the test again, but deleting the document with id 0 instead of id 1 now
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.delete_external_id("38");
delete.execute().unwrap();
wtxn.commit().unwrap();
db_snap!(index, documents_ids, @"[1, ]");
db_snap!(index, external_documents_ids, 5, @r###"
soft:
hard:
34 1
38 0
"###);
db_snap!(index, soft_deleted_documents_ids, 5, @"[0, ]");
index
.update_settings(|s| {
s.set_searchable_fields(vec!["primary_key".to_owned()]);
})
.unwrap();
db_snap!(index, documents_ids, @"[1, ]");
db_snap!(index, external_documents_ids, 6, @r###"
soft:
hard:
34 1
"###);
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
// And adding lots of documents afterwards instead of just one.
// These extra subtests don't add much, but it's better than nothing.
index.add_documents(documents!([{ "primary_key": 38 }, { "primary_key": 39 }, { "primary_key": 41 }, { "primary_key": 40 }, { "primary_key": 41 }, { "primary_key": 42 }])).unwrap();
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
db_snap!(index, external_documents_ids, 7, @r###"
soft:
hard:
34 1
38 0
39 2
40 4
41 3
42 5
"###);
db_snap!(index, soft_deleted_documents_ids, 7, @"[]");
}
#[test]
fn bug_3021_second() {
// https://github.com/meilisearch/meilisearch/issues/3021
let mut index = TempIndex::new();
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
index
.update_settings(|settings| {
settings.set_primary_key("primary_key".to_owned());
})
.unwrap();
index
.add_documents(documents!([
{ "primary_key": 30 },
{ "primary_key": 34 }
]))
.unwrap();
db_snap!(index, documents_ids, @"[0, 1, ]");
db_snap!(index, external_documents_ids, 1, @r###"
soft:
hard:
30 0
34 1
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
delete.delete_external_id("34");
delete.execute().unwrap();
wtxn.commit().unwrap();
db_snap!(index, documents_ids, @"[0, ]");
db_snap!(index, external_documents_ids, 2, @r###"
soft:
hard:
30 0
34 1
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]");
index
.update_settings(|s| {
s.set_searchable_fields(vec![]);
})
.unwrap();
// The key point of the test is to verify that the external documents ids
// do not contain any entry for previously soft-deleted document ids
db_snap!(index, documents_ids, @"[0, ]");
db_snap!(index, external_documents_ids, 3, @r###"
soft:
hard:
30 0
"###);
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
// So that when we add a new document
index.add_documents(documents!({ "primary_key": 35, "b": 2 })).unwrap();
db_snap!(index, documents_ids, @"[0, 1, ]");
// The external documents ids don't have several external ids pointing to the same
// internal document id
db_snap!(index, external_documents_ids, 4, @r###"
soft:
hard:
30 0
35 1
"###);
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
// And when we add 34 again, we don't replace document 35
index.add_documents(documents!({ "primary_key": 34, "a": 1 })).unwrap();
// And document 35 still exists, is not deleted
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
db_snap!(index, external_documents_ids, 5, @r###"
soft:
hard:
30 0
34 2
35 1
"###);
db_snap!(index, soft_deleted_documents_ids, 5, @"[]");
let rtxn = index.read_txn().unwrap();
let (_docid, obkv) = index.documents(&rtxn, [0]).unwrap()[0];
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
insta::assert_debug_snapshot!(json, @r###"
{
"primary_key": Number(30),
}
"###);
// Furthermore, when we retrieve document 34, it is not the result of merging 35 with 34
let (_docid, obkv) = index.documents(&rtxn, [2]).unwrap()[0];
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
insta::assert_debug_snapshot!(json, @r###"
{
"primary_key": Number(34),
"a": Number(1),
}
"###);
drop(rtxn);
// Add new documents again
index
.add_documents(
documents!([{ "primary_key": 37 }, { "primary_key": 38 }, { "primary_key": 39 }]),
)
.unwrap();
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
db_snap!(index, external_documents_ids, 6, @r###"
soft:
hard:
30 0
34 2
35 1
37 3
38 4
39 5
"###);
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
}
}