mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-04 20:37:15 +02:00
Fix bug in handling of soft deleted documents when updating settings
This commit is contained in:
parent
d6eacb2aac
commit
67d8cec209
4 changed files with 310 additions and 182 deletions
|
@ -1185,13 +1185,15 @@ pub(crate) mod tests {
|
|||
|
||||
use big_s::S;
|
||||
use heed::{EnvOpenOptions, RwTxn};
|
||||
use maplit::hashset;
|
||||
use tempfile::TempDir;
|
||||
|
||||
use crate::documents::DocumentsBatchReader;
|
||||
use crate::error::{Error, InternalError};
|
||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||
use crate::update::{
|
||||
self, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
|
||||
self, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||
IndexerConfig, Settings,
|
||||
};
|
||||
use crate::{db_snap, obkv_to_json, Index};
|
||||
|
||||
|
@ -1485,7 +1487,7 @@ pub(crate) mod tests {
|
|||
use big_s::S;
|
||||
use maplit::hashset;
|
||||
|
||||
let mut index = TempIndex::new();
|
||||
let index = TempIndex::new();
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
|
@ -1544,7 +1546,6 @@ pub(crate) mod tests {
|
|||
1 0 3 1 [3, 6, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index
|
||||
.add_documents(documents!([{ "id": 3, "doggo": 4 }, { "id": 3, "doggo": 5 },{ "id": 3, "doggo": 4 }]))
|
||||
.unwrap();
|
||||
|
@ -1568,7 +1569,6 @@ pub(crate) mod tests {
|
|||
1 0 4 1 [7, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_distinct_field("id".to_owned());
|
||||
|
@ -1576,37 +1576,13 @@ pub(crate) mod tests {
|
|||
.unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[4, 5, 6, 7, ]");
|
||||
db_snap!(index, external_documents_ids, 3, @r###"
|
||||
soft:
|
||||
3 7
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 6
|
||||
3 3
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
||||
db_snap!(index, facet_id_f64_docids, 3, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [6, ]
|
||||
0 0 3 1 [7, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [7, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = true;
|
||||
index.add_documents(documents!([{ "id": 3, "doggo": 4 }])).unwrap();
|
||||
db_snap!(index, external_documents_ids, 3, @r###"
|
||||
soft:
|
||||
3 7
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 6
|
||||
3 3
|
||||
3 7
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
||||
db_snap!(index, facet_id_f64_docids, 3, @r###"
|
||||
|
@ -1619,140 +1595,6 @@ pub(crate) mod tests {
|
|||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [7, ]
|
||||
"###);
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
let docid = delete.delete_external_id("3").unwrap();
|
||||
insta::assert_snapshot!(format!("{docid}"), @"7");
|
||||
delete.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[4, 5, 6, ]");
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
3 7
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 6
|
||||
3 3
|
||||
"###);
|
||||
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[7, ]");
|
||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [6, ]
|
||||
0 0 3 1 [7, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [7, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index.add_documents(documents!([{ "id": 3, "doggo": 4 }])).unwrap();
|
||||
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
3 0
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 6
|
||||
3 3
|
||||
"###);
|
||||
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[7, ]");
|
||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [6, ]
|
||||
0 0 3 1 [0, 7, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [0, 7, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index.add_documents(documents!([{ "id": 3, "doggo": 5 }])).unwrap();
|
||||
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
3 1
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 6
|
||||
3 3
|
||||
"###);
|
||||
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 7, ]");
|
||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [6, ]
|
||||
0 0 3 1 [0, 1, 7, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [0, 7, ]
|
||||
1 0 5 1 [1, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index.add_documents(documents!([{ "id": 3, "doggo": 5, "id": 2, "doggo": 4 }])).unwrap();
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 2
|
||||
3 1
|
||||
"###);
|
||||
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 6, 7, ]");
|
||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [2, 6, ]
|
||||
0 0 3 1 [0, 1, 7, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [0, 2, 7, ]
|
||||
1 0 5 1 [1, ]
|
||||
"###);
|
||||
|
||||
index.index_documents_config.disable_soft_deletion = false;
|
||||
index
|
||||
.add_documents(documents!([{ "id": 4, "doggo": 5 }, { "id": 3, "doggo": 5 }]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
4 3
|
||||
hard:
|
||||
0 4
|
||||
1 5
|
||||
2 2
|
||||
3 1
|
||||
"###);
|
||||
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 6, 7, ]");
|
||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
||||
0 0 0 1 [4, ]
|
||||
0 0 1 1 [5, ]
|
||||
0 0 2 1 [2, 6, ]
|
||||
0 0 3 1 [0, 1, 7, ]
|
||||
0 0 4 1 [3, ]
|
||||
1 0 1 1 [4, ]
|
||||
1 0 2 1 [5, ]
|
||||
1 0 3 1 [6, ]
|
||||
1 0 4 1 [0, 2, 7, ]
|
||||
1 0 5 1 [1, 3, ]
|
||||
"###);
|
||||
}
|
||||
|
||||
#[test]
|
||||
|
@ -2020,4 +1862,253 @@ pub(crate) mod tests {
|
|||
drop(rtxn);
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bug_3021_first() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/3021
|
||||
let mut index = TempIndex::new();
|
||||
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("primary_key".to_owned());
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "primary_key": 38 },
|
||||
{ "primary_key": 34 }
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||
db_snap!(index, external_documents_ids, 1, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
38 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
delete.delete_external_id("34");
|
||||
delete.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, ]");
|
||||
db_snap!(index, external_documents_ids, 2, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
38 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]");
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_searchable_fields(vec![]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// The key point of the test is to verify that the external documents ids
|
||||
// do not contain any entry for previously soft-deleted document ids
|
||||
db_snap!(index, documents_ids, @"[0, ]");
|
||||
db_snap!(index, external_documents_ids, 3, @r###"
|
||||
soft:
|
||||
hard:
|
||||
38 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
||||
|
||||
// So that this document addition works correctly now.
|
||||
// It would be wrongly interpreted as a replacement before
|
||||
index.add_documents(documents!({ "primary_key": 34 })).unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
38 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
|
||||
|
||||
// We do the test again, but deleting the document with id 0 instead of id 1 now
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
delete.delete_external_id("38");
|
||||
delete.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[1, ]");
|
||||
db_snap!(index, external_documents_ids, 5, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
38 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 5, @"[0, ]");
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_searchable_fields(vec!["primary_key".to_owned()]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[1, ]");
|
||||
db_snap!(index, external_documents_ids, 6, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
|
||||
|
||||
// And adding lots of documents afterwards instead of just one.
|
||||
// These extra subtests don't add much, but it's better than nothing.
|
||||
index.add_documents(documents!([{ "primary_key": 38 }, { "primary_key": 39 }, { "primary_key": 41 }, { "primary_key": 40 }, { "primary_key": 41 }, { "primary_key": 42 }])).unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
|
||||
db_snap!(index, external_documents_ids, 7, @r###"
|
||||
soft:
|
||||
hard:
|
||||
34 1
|
||||
38 0
|
||||
39 2
|
||||
40 4
|
||||
41 3
|
||||
42 5
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 7, @"[]");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn bug_3021_second() {
|
||||
// https://github.com/meilisearch/meilisearch/issues/3021
|
||||
let mut index = TempIndex::new();
|
||||
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
||||
|
||||
index
|
||||
.update_settings(|settings| {
|
||||
settings.set_primary_key("primary_key".to_owned());
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
index
|
||||
.add_documents(documents!([
|
||||
{ "primary_key": 30 },
|
||||
{ "primary_key": 34 }
|
||||
]))
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||
db_snap!(index, external_documents_ids, 1, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
34 1
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
|
||||
|
||||
let mut wtxn = index.write_txn().unwrap();
|
||||
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||
delete.delete_external_id("34");
|
||||
delete.execute().unwrap();
|
||||
wtxn.commit().unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, ]");
|
||||
db_snap!(index, external_documents_ids, 2, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
34 1
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]");
|
||||
|
||||
index
|
||||
.update_settings(|s| {
|
||||
s.set_searchable_fields(vec![]);
|
||||
})
|
||||
.unwrap();
|
||||
|
||||
// The key point of the test is to verify that the external documents ids
|
||||
// do not contain any entry for previously soft-deleted document ids
|
||||
db_snap!(index, documents_ids, @"[0, ]");
|
||||
db_snap!(index, external_documents_ids, 3, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
||||
|
||||
// So that when we add a new document
|
||||
index.add_documents(documents!({ "primary_key": 35, "b": 2 })).unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||
// The external documents ids don't have several external ids pointing to the same
|
||||
// internal document id
|
||||
db_snap!(index, external_documents_ids, 4, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
35 1
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
|
||||
|
||||
// And when we add 34 again, we don't replace document 35
|
||||
index.add_documents(documents!({ "primary_key": 34, "a": 1 })).unwrap();
|
||||
|
||||
// And document 35 still exists, is not deleted
|
||||
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
|
||||
db_snap!(index, external_documents_ids, 5, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
34 2
|
||||
35 1
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 5, @"[]");
|
||||
|
||||
let rtxn = index.read_txn().unwrap();
|
||||
let (_docid, obkv) = index.documents(&rtxn, [0]).unwrap()[0];
|
||||
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
||||
insta::assert_debug_snapshot!(json, @r###"
|
||||
{
|
||||
"primary_key": Number(30),
|
||||
}
|
||||
"###);
|
||||
|
||||
// Furthermore, when we retrieve document 34, it is not the result of merging 35 with 34
|
||||
let (_docid, obkv) = index.documents(&rtxn, [2]).unwrap()[0];
|
||||
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
||||
insta::assert_debug_snapshot!(json, @r###"
|
||||
{
|
||||
"primary_key": Number(34),
|
||||
"a": Number(1),
|
||||
}
|
||||
"###);
|
||||
|
||||
drop(rtxn);
|
||||
|
||||
// Add new documents again
|
||||
index
|
||||
.add_documents(
|
||||
documents!([{ "primary_key": 37 }, { "primary_key": 38 }, { "primary_key": 39 }]),
|
||||
)
|
||||
.unwrap();
|
||||
|
||||
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
|
||||
db_snap!(index, external_documents_ids, 6, @r###"
|
||||
soft:
|
||||
hard:
|
||||
30 0
|
||||
34 2
|
||||
35 1
|
||||
37 3
|
||||
38 4
|
||||
39 5
|
||||
"###);
|
||||
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
|
||||
}
|
||||
}
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue