mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-30 00:34:26 +01:00
Fix bug in handling of soft deleted documents when updating settings
This commit is contained in:
parent
d6eacb2aac
commit
67d8cec209
@ -71,6 +71,30 @@ impl<'a> ExternalDocumentsIds<'a> {
|
|||||||
self.merge_soft_into_hard()
|
self.merge_soft_into_hard()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/// Rebuild the internal FSTs in the ExternalDocumentsIds structure such that they
|
||||||
|
/// don't contain any soft deleted document id.
|
||||||
|
pub fn delete_soft_deleted_documents_ids_from_fsts(&mut self) -> fst::Result<()> {
|
||||||
|
let mut new_hard_builder = fst::MapBuilder::memory();
|
||||||
|
|
||||||
|
let union_op = self.hard.op().add(&self.soft).r#union();
|
||||||
|
let mut iter = union_op.into_stream();
|
||||||
|
while let Some((external_id, docids)) = iter.next() {
|
||||||
|
// prefer selecting the ids from soft, always
|
||||||
|
let id = indexed_last_value(docids).unwrap();
|
||||||
|
if id != DELETED_ID && !self.soft_deleted_docids.contains(id as u32) {
|
||||||
|
new_hard_builder.insert(external_id, id)?;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
drop(iter);
|
||||||
|
|
||||||
|
// Delete soft map completely
|
||||||
|
self.soft = fst::Map::default().map_data(Cow::Owned)?;
|
||||||
|
// We save the new map as the new hard map.
|
||||||
|
self.hard = new_hard_builder.into_map().map_data(Cow::Owned)?;
|
||||||
|
|
||||||
|
Ok(())
|
||||||
|
}
|
||||||
|
|
||||||
pub fn insert_ids<A: AsRef<[u8]>>(&mut self, other: &fst::Map<A>) -> fst::Result<()> {
|
pub fn insert_ids<A: AsRef<[u8]>>(&mut self, other: &fst::Map<A>) -> fst::Result<()> {
|
||||||
let union_op = self.soft.op().add(other).r#union();
|
let union_op = self.soft.op().add(other).r#union();
|
||||||
|
|
||||||
|
@ -1185,13 +1185,15 @@ pub(crate) mod tests {
|
|||||||
|
|
||||||
use big_s::S;
|
use big_s::S;
|
||||||
use heed::{EnvOpenOptions, RwTxn};
|
use heed::{EnvOpenOptions, RwTxn};
|
||||||
|
use maplit::hashset;
|
||||||
use tempfile::TempDir;
|
use tempfile::TempDir;
|
||||||
|
|
||||||
use crate::documents::DocumentsBatchReader;
|
use crate::documents::DocumentsBatchReader;
|
||||||
use crate::error::{Error, InternalError};
|
use crate::error::{Error, InternalError};
|
||||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||||
use crate::update::{
|
use crate::update::{
|
||||||
self, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexerConfig, Settings,
|
self, DeleteDocuments, IndexDocuments, IndexDocumentsConfig, IndexDocumentsMethod,
|
||||||
|
IndexerConfig, Settings,
|
||||||
};
|
};
|
||||||
use crate::{db_snap, obkv_to_json, Index};
|
use crate::{db_snap, obkv_to_json, Index};
|
||||||
|
|
||||||
@ -1485,7 +1487,7 @@ pub(crate) mod tests {
|
|||||||
use big_s::S;
|
use big_s::S;
|
||||||
use maplit::hashset;
|
use maplit::hashset;
|
||||||
|
|
||||||
let mut index = TempIndex::new();
|
let index = TempIndex::new();
|
||||||
|
|
||||||
index
|
index
|
||||||
.update_settings(|settings| {
|
.update_settings(|settings| {
|
||||||
@ -1544,7 +1546,6 @@ pub(crate) mod tests {
|
|||||||
1 0 3 1 [3, 6, ]
|
1 0 3 1 [3, 6, ]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
index.index_documents_config.disable_soft_deletion = false;
|
|
||||||
index
|
index
|
||||||
.add_documents(documents!([{ "id": 3, "doggo": 4 }, { "id": 3, "doggo": 5 },{ "id": 3, "doggo": 4 }]))
|
.add_documents(documents!([{ "id": 3, "doggo": 4 }, { "id": 3, "doggo": 5 },{ "id": 3, "doggo": 4 }]))
|
||||||
.unwrap();
|
.unwrap();
|
||||||
@ -1568,7 +1569,6 @@ pub(crate) mod tests {
|
|||||||
1 0 4 1 [7, ]
|
1 0 4 1 [7, ]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
index.index_documents_config.disable_soft_deletion = false;
|
|
||||||
index
|
index
|
||||||
.update_settings(|settings| {
|
.update_settings(|settings| {
|
||||||
settings.set_distinct_field("id".to_owned());
|
settings.set_distinct_field("id".to_owned());
|
||||||
@ -1578,12 +1578,11 @@ pub(crate) mod tests {
|
|||||||
db_snap!(index, documents_ids, @"[4, 5, 6, 7, ]");
|
db_snap!(index, documents_ids, @"[4, 5, 6, 7, ]");
|
||||||
db_snap!(index, external_documents_ids, 3, @r###"
|
db_snap!(index, external_documents_ids, 3, @r###"
|
||||||
soft:
|
soft:
|
||||||
3 7
|
|
||||||
hard:
|
hard:
|
||||||
0 4
|
0 4
|
||||||
1 5
|
1 5
|
||||||
2 6
|
2 6
|
||||||
3 3
|
3 7
|
||||||
"###);
|
"###);
|
||||||
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
||||||
db_snap!(index, facet_id_f64_docids, 3, @r###"
|
db_snap!(index, facet_id_f64_docids, 3, @r###"
|
||||||
@ -1596,163 +1595,6 @@ pub(crate) mod tests {
|
|||||||
1 0 3 1 [6, ]
|
1 0 3 1 [6, ]
|
||||||
1 0 4 1 [7, ]
|
1 0 4 1 [7, ]
|
||||||
"###);
|
"###);
|
||||||
|
|
||||||
index.index_documents_config.disable_soft_deletion = true;
|
|
||||||
index.add_documents(documents!([{ "id": 3, "doggo": 4 }])).unwrap();
|
|
||||||
db_snap!(index, external_documents_ids, 3, @r###"
|
|
||||||
soft:
|
|
||||||
3 7
|
|
||||||
hard:
|
|
||||||
0 4
|
|
||||||
1 5
|
|
||||||
2 6
|
|
||||||
3 3
|
|
||||||
"###);
|
|
||||||
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
|
||||||
db_snap!(index, facet_id_f64_docids, 3, @r###"
|
|
||||||
0 0 0 1 [4, ]
|
|
||||||
0 0 1 1 [5, ]
|
|
||||||
0 0 2 1 [6, ]
|
|
||||||
0 0 3 1 [7, ]
|
|
||||||
1 0 1 1 [4, ]
|
|
||||||
1 0 2 1 [5, ]
|
|
||||||
1 0 3 1 [6, ]
|
|
||||||
1 0 4 1 [7, ]
|
|
||||||
"###);
|
|
||||||
|
|
||||||
let mut wtxn = index.write_txn().unwrap();
|
|
||||||
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
|
||||||
let docid = delete.delete_external_id("3").unwrap();
|
|
||||||
insta::assert_snapshot!(format!("{docid}"), @"7");
|
|
||||||
delete.execute().unwrap();
|
|
||||||
wtxn.commit().unwrap();
|
|
||||||
|
|
||||||
db_snap!(index, documents_ids, @"[4, 5, 6, ]");
|
|
||||||
db_snap!(index, external_documents_ids, 4, @r###"
|
|
||||||
soft:
|
|
||||||
3 7
|
|
||||||
hard:
|
|
||||||
0 4
|
|
||||||
1 5
|
|
||||||
2 6
|
|
||||||
3 3
|
|
||||||
"###);
|
|
||||||
|
|
||||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[7, ]");
|
|
||||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
|
||||||
0 0 0 1 [4, ]
|
|
||||||
0 0 1 1 [5, ]
|
|
||||||
0 0 2 1 [6, ]
|
|
||||||
0 0 3 1 [7, ]
|
|
||||||
1 0 1 1 [4, ]
|
|
||||||
1 0 2 1 [5, ]
|
|
||||||
1 0 3 1 [6, ]
|
|
||||||
1 0 4 1 [7, ]
|
|
||||||
"###);
|
|
||||||
|
|
||||||
index.index_documents_config.disable_soft_deletion = false;
|
|
||||||
index.add_documents(documents!([{ "id": 3, "doggo": 4 }])).unwrap();
|
|
||||||
|
|
||||||
db_snap!(index, external_documents_ids, 4, @r###"
|
|
||||||
soft:
|
|
||||||
3 0
|
|
||||||
hard:
|
|
||||||
0 4
|
|
||||||
1 5
|
|
||||||
2 6
|
|
||||||
3 3
|
|
||||||
"###);
|
|
||||||
|
|
||||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[7, ]");
|
|
||||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
|
||||||
0 0 0 1 [4, ]
|
|
||||||
0 0 1 1 [5, ]
|
|
||||||
0 0 2 1 [6, ]
|
|
||||||
0 0 3 1 [0, 7, ]
|
|
||||||
1 0 1 1 [4, ]
|
|
||||||
1 0 2 1 [5, ]
|
|
||||||
1 0 3 1 [6, ]
|
|
||||||
1 0 4 1 [0, 7, ]
|
|
||||||
"###);
|
|
||||||
|
|
||||||
index.index_documents_config.disable_soft_deletion = false;
|
|
||||||
index.add_documents(documents!([{ "id": 3, "doggo": 5 }])).unwrap();
|
|
||||||
|
|
||||||
db_snap!(index, external_documents_ids, 4, @r###"
|
|
||||||
soft:
|
|
||||||
3 1
|
|
||||||
hard:
|
|
||||||
0 4
|
|
||||||
1 5
|
|
||||||
2 6
|
|
||||||
3 3
|
|
||||||
"###);
|
|
||||||
|
|
||||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 7, ]");
|
|
||||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
|
||||||
0 0 0 1 [4, ]
|
|
||||||
0 0 1 1 [5, ]
|
|
||||||
0 0 2 1 [6, ]
|
|
||||||
0 0 3 1 [0, 1, 7, ]
|
|
||||||
1 0 1 1 [4, ]
|
|
||||||
1 0 2 1 [5, ]
|
|
||||||
1 0 3 1 [6, ]
|
|
||||||
1 0 4 1 [0, 7, ]
|
|
||||||
1 0 5 1 [1, ]
|
|
||||||
"###);
|
|
||||||
|
|
||||||
index.index_documents_config.disable_soft_deletion = false;
|
|
||||||
index.add_documents(documents!([{ "id": 3, "doggo": 5, "id": 2, "doggo": 4 }])).unwrap();
|
|
||||||
db_snap!(index, external_documents_ids, 4, @r###"
|
|
||||||
soft:
|
|
||||||
hard:
|
|
||||||
0 4
|
|
||||||
1 5
|
|
||||||
2 2
|
|
||||||
3 1
|
|
||||||
"###);
|
|
||||||
|
|
||||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 6, 7, ]");
|
|
||||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
|
||||||
0 0 0 1 [4, ]
|
|
||||||
0 0 1 1 [5, ]
|
|
||||||
0 0 2 1 [2, 6, ]
|
|
||||||
0 0 3 1 [0, 1, 7, ]
|
|
||||||
1 0 1 1 [4, ]
|
|
||||||
1 0 2 1 [5, ]
|
|
||||||
1 0 3 1 [6, ]
|
|
||||||
1 0 4 1 [0, 2, 7, ]
|
|
||||||
1 0 5 1 [1, ]
|
|
||||||
"###);
|
|
||||||
|
|
||||||
index.index_documents_config.disable_soft_deletion = false;
|
|
||||||
index
|
|
||||||
.add_documents(documents!([{ "id": 4, "doggo": 5 }, { "id": 3, "doggo": 5 }]))
|
|
||||||
.unwrap();
|
|
||||||
|
|
||||||
db_snap!(index, external_documents_ids, 4, @r###"
|
|
||||||
soft:
|
|
||||||
4 3
|
|
||||||
hard:
|
|
||||||
0 4
|
|
||||||
1 5
|
|
||||||
2 2
|
|
||||||
3 1
|
|
||||||
"###);
|
|
||||||
|
|
||||||
db_snap!(index, soft_deleted_documents_ids, 4, @"[0, 6, 7, ]");
|
|
||||||
db_snap!(index, facet_id_f64_docids, 4, @r###"
|
|
||||||
0 0 0 1 [4, ]
|
|
||||||
0 0 1 1 [5, ]
|
|
||||||
0 0 2 1 [2, 6, ]
|
|
||||||
0 0 3 1 [0, 1, 7, ]
|
|
||||||
0 0 4 1 [3, ]
|
|
||||||
1 0 1 1 [4, ]
|
|
||||||
1 0 2 1 [5, ]
|
|
||||||
1 0 3 1 [6, ]
|
|
||||||
1 0 4 1 [0, 2, 7, ]
|
|
||||||
1 0 5 1 [1, 3, ]
|
|
||||||
"###);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#[test]
|
#[test]
|
||||||
@ -2020,4 +1862,253 @@ pub(crate) mod tests {
|
|||||||
drop(rtxn);
|
drop(rtxn);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bug_3021_first() {
|
||||||
|
// https://github.com/meilisearch/meilisearch/issues/3021
|
||||||
|
let mut index = TempIndex::new();
|
||||||
|
index.index_documents_config.update_method = IndexDocumentsMethod::ReplaceDocuments;
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| {
|
||||||
|
settings.set_primary_key("primary_key".to_owned());
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{ "primary_key": 38 },
|
||||||
|
{ "primary_key": 34 }
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 1, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
34 1
|
||||||
|
38 0
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||||
|
delete.delete_external_id("34");
|
||||||
|
delete.execute().unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[0, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 2, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
34 1
|
||||||
|
38 0
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]");
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_searchable_fields(vec![]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// The key point of the test is to verify that the external documents ids
|
||||||
|
// do not contain any entry for previously soft-deleted document ids
|
||||||
|
db_snap!(index, documents_ids, @"[0, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 3, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
38 0
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
||||||
|
|
||||||
|
// So that this document addition works correctly now.
|
||||||
|
// It would be wrongly interpreted as a replacement before
|
||||||
|
index.add_documents(documents!({ "primary_key": 34 })).unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 4, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
34 1
|
||||||
|
38 0
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
|
||||||
|
|
||||||
|
// We do the test again, but deleting the document with id 0 instead of id 1 now
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||||
|
delete.delete_external_id("38");
|
||||||
|
delete.execute().unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[1, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 5, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
34 1
|
||||||
|
38 0
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 5, @"[0, ]");
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_searchable_fields(vec!["primary_key".to_owned()]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[1, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 6, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
34 1
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
|
||||||
|
|
||||||
|
// And adding lots of documents afterwards instead of just one.
|
||||||
|
// These extra subtests don't add much, but it's better than nothing.
|
||||||
|
index.add_documents(documents!([{ "primary_key": 38 }, { "primary_key": 39 }, { "primary_key": 41 }, { "primary_key": 40 }, { "primary_key": 41 }, { "primary_key": 42 }])).unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 7, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
34 1
|
||||||
|
38 0
|
||||||
|
39 2
|
||||||
|
40 4
|
||||||
|
41 3
|
||||||
|
42 5
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 7, @"[]");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn bug_3021_second() {
|
||||||
|
// https://github.com/meilisearch/meilisearch/issues/3021
|
||||||
|
let mut index = TempIndex::new();
|
||||||
|
index.index_documents_config.update_method = IndexDocumentsMethod::UpdateDocuments;
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|settings| {
|
||||||
|
settings.set_primary_key("primary_key".to_owned());
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
index
|
||||||
|
.add_documents(documents!([
|
||||||
|
{ "primary_key": 30 },
|
||||||
|
{ "primary_key": 34 }
|
||||||
|
]))
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 1, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
30 0
|
||||||
|
34 1
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
|
||||||
|
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
|
||||||
|
delete.delete_external_id("34");
|
||||||
|
delete.execute().unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[0, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 2, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
30 0
|
||||||
|
34 1
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]");
|
||||||
|
|
||||||
|
index
|
||||||
|
.update_settings(|s| {
|
||||||
|
s.set_searchable_fields(vec![]);
|
||||||
|
})
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
// The key point of the test is to verify that the external documents ids
|
||||||
|
// do not contain any entry for previously soft-deleted document ids
|
||||||
|
db_snap!(index, documents_ids, @"[0, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 3, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
30 0
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
|
||||||
|
|
||||||
|
// So that when we add a new document
|
||||||
|
index.add_documents(documents!({ "primary_key": 35, "b": 2 })).unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[0, 1, ]");
|
||||||
|
// The external documents ids don't have several external ids pointing to the same
|
||||||
|
// internal document id
|
||||||
|
db_snap!(index, external_documents_ids, 4, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
30 0
|
||||||
|
35 1
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
|
||||||
|
|
||||||
|
// And when we add 34 again, we don't replace document 35
|
||||||
|
index.add_documents(documents!({ "primary_key": 34, "a": 1 })).unwrap();
|
||||||
|
|
||||||
|
// And document 35 still exists, is not deleted
|
||||||
|
db_snap!(index, documents_ids, @"[0, 1, 2, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 5, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
30 0
|
||||||
|
34 2
|
||||||
|
35 1
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 5, @"[]");
|
||||||
|
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let (_docid, obkv) = index.documents(&rtxn, [0]).unwrap()[0];
|
||||||
|
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
||||||
|
insta::assert_debug_snapshot!(json, @r###"
|
||||||
|
{
|
||||||
|
"primary_key": Number(30),
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
// Furthermore, when we retrieve document 34, it is not the result of merging 35 with 34
|
||||||
|
let (_docid, obkv) = index.documents(&rtxn, [2]).unwrap()[0];
|
||||||
|
let json = obkv_to_json(&[0, 1, 2], &index.fields_ids_map(&rtxn).unwrap(), obkv).unwrap();
|
||||||
|
insta::assert_debug_snapshot!(json, @r###"
|
||||||
|
{
|
||||||
|
"primary_key": Number(34),
|
||||||
|
"a": Number(1),
|
||||||
|
}
|
||||||
|
"###);
|
||||||
|
|
||||||
|
drop(rtxn);
|
||||||
|
|
||||||
|
// Add new documents again
|
||||||
|
index
|
||||||
|
.add_documents(
|
||||||
|
documents!([{ "primary_key": 37 }, { "primary_key": 38 }, { "primary_key": 39 }]),
|
||||||
|
)
|
||||||
|
.unwrap();
|
||||||
|
|
||||||
|
db_snap!(index, documents_ids, @"[0, 1, 2, 3, 4, 5, ]");
|
||||||
|
db_snap!(index, external_documents_ids, 6, @r###"
|
||||||
|
soft:
|
||||||
|
hard:
|
||||||
|
30 0
|
||||||
|
34 2
|
||||||
|
35 1
|
||||||
|
37 3
|
||||||
|
38 4
|
||||||
|
39 5
|
||||||
|
"###);
|
||||||
|
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
@ -17,7 +17,7 @@ use super::{IndexDocumentsMethod, IndexerConfig};
|
|||||||
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
|
use crate::documents::{DocumentsBatchIndex, EnrichedDocument, EnrichedDocumentsBatchReader};
|
||||||
use crate::error::{Error, InternalError, UserError};
|
use crate::error::{Error, InternalError, UserError};
|
||||||
use crate::index::db_name;
|
use crate::index::db_name;
|
||||||
use crate::update::{AvailableDocumentsIds, UpdateIndexingStep};
|
use crate::update::{AvailableDocumentsIds, ClearDocuments, UpdateIndexingStep};
|
||||||
use crate::{
|
use crate::{
|
||||||
ExternalDocumentsIds, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index,
|
ExternalDocumentsIds, FieldDistribution, FieldId, FieldIdMapMissingEntry, FieldsIdsMap, Index,
|
||||||
Result, BEU32,
|
Result, BEU32,
|
||||||
@ -546,12 +546,13 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
})
|
})
|
||||||
}
|
}
|
||||||
|
|
||||||
/// Returns a `TransformOutput` with a file that contains the documents of the index
|
/// Clear all databases. Returns a `TransformOutput` with a file that contains the documents
|
||||||
/// with the attributes reordered accordingly to the `FieldsIdsMap` given as argument.
|
/// of the index with the attributes reordered accordingly to the `FieldsIdsMap` given as argument.
|
||||||
|
///
|
||||||
// TODO this can be done in parallel by using the rayon `ThreadPool`.
|
// TODO this can be done in parallel by using the rayon `ThreadPool`.
|
||||||
pub fn remap_index_documents(
|
pub fn prepare_for_documents_reindexing(
|
||||||
self,
|
self,
|
||||||
wtxn: &mut heed::RwTxn,
|
wtxn: &mut heed::RwTxn<'i, '_>,
|
||||||
old_fields_ids_map: FieldsIdsMap,
|
old_fields_ids_map: FieldsIdsMap,
|
||||||
mut new_fields_ids_map: FieldsIdsMap,
|
mut new_fields_ids_map: FieldsIdsMap,
|
||||||
) -> Result<TransformOutput> {
|
) -> Result<TransformOutput> {
|
||||||
@ -559,7 +560,14 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
let primary_key =
|
let primary_key =
|
||||||
self.index.primary_key(wtxn)?.ok_or(UserError::MissingPrimaryKey)?.to_string();
|
self.index.primary_key(wtxn)?.ok_or(UserError::MissingPrimaryKey)?.to_string();
|
||||||
let field_distribution = self.index.field_distribution(wtxn)?;
|
let field_distribution = self.index.field_distribution(wtxn)?;
|
||||||
let external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
|
||||||
|
// Delete the soft deleted document ids from the maps inside the external_document_ids structure
|
||||||
|
let new_external_documents_ids = {
|
||||||
|
let mut external_documents_ids = self.index.external_documents_ids(wtxn)?;
|
||||||
|
external_documents_ids.delete_soft_deleted_documents_ids_from_fsts()?;
|
||||||
|
external_documents_ids
|
||||||
|
};
|
||||||
|
|
||||||
let documents_ids = self.index.documents_ids(wtxn)?;
|
let documents_ids = self.index.documents_ids(wtxn)?;
|
||||||
let documents_count = documents_ids.len() as usize;
|
let documents_count = documents_ids.len() as usize;
|
||||||
|
|
||||||
@ -638,17 +646,25 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
let mut flattened_documents = flattened_writer.into_inner()?;
|
let mut flattened_documents = flattened_writer.into_inner()?;
|
||||||
flattened_documents.seek(SeekFrom::Start(0))?;
|
flattened_documents.seek(SeekFrom::Start(0))?;
|
||||||
|
|
||||||
Ok(TransformOutput {
|
let output = TransformOutput {
|
||||||
primary_key,
|
primary_key,
|
||||||
fields_ids_map: new_fields_ids_map,
|
fields_ids_map: new_fields_ids_map,
|
||||||
field_distribution,
|
field_distribution,
|
||||||
external_documents_ids: external_documents_ids.into_static(),
|
external_documents_ids: new_external_documents_ids.into_static(),
|
||||||
new_documents_ids: documents_ids,
|
new_documents_ids: documents_ids,
|
||||||
replaced_documents_ids: RoaringBitmap::default(),
|
replaced_documents_ids: RoaringBitmap::default(),
|
||||||
documents_count,
|
documents_count,
|
||||||
original_documents,
|
original_documents,
|
||||||
flattened_documents,
|
flattened_documents,
|
||||||
})
|
};
|
||||||
|
|
||||||
|
let new_facets = output.compute_real_facets(wtxn, self.index)?;
|
||||||
|
self.index.put_faceted_fields(wtxn, &new_facets)?;
|
||||||
|
|
||||||
|
// We clear the full database (words-fst, documents ids and documents content).
|
||||||
|
ClearDocuments::new(wtxn, self.index).execute()?;
|
||||||
|
|
||||||
|
Ok(output)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -12,7 +12,7 @@ use crate::criterion::Criterion;
|
|||||||
use crate::error::UserError;
|
use crate::error::UserError;
|
||||||
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
use crate::index::{DEFAULT_MIN_WORD_LEN_ONE_TYPO, DEFAULT_MIN_WORD_LEN_TWO_TYPOS};
|
||||||
use crate::update::index_documents::IndexDocumentsMethod;
|
use crate::update::index_documents::IndexDocumentsMethod;
|
||||||
use crate::update::{ClearDocuments, IndexDocuments, UpdateIndexingStep};
|
use crate::update::{IndexDocuments, UpdateIndexingStep};
|
||||||
use crate::{FieldsIdsMap, Index, Result};
|
use crate::{FieldsIdsMap, Index, Result};
|
||||||
|
|
||||||
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
#[derive(Debug, Clone, PartialEq, Eq, Copy)]
|
||||||
@ -291,15 +291,12 @@ impl<'a, 't, 'u, 'i> Settings<'a, 't, 'u, 'i> {
|
|||||||
false,
|
false,
|
||||||
)?;
|
)?;
|
||||||
|
|
||||||
// We remap the documents fields based on the new `FieldsIdsMap`.
|
// We clear the databases and remap the documents fields based on the new `FieldsIdsMap`.
|
||||||
let output =
|
let output = transform.prepare_for_documents_reindexing(
|
||||||
transform.remap_index_documents(self.wtxn, old_fields_ids_map, fields_ids_map)?;
|
self.wtxn,
|
||||||
|
old_fields_ids_map,
|
||||||
let new_facets = output.compute_real_facets(self.wtxn, self.index)?;
|
fields_ids_map,
|
||||||
self.index.put_faceted_fields(self.wtxn, &new_facets)?;
|
)?;
|
||||||
|
|
||||||
// We clear the full database (words-fst, documents ids and documents content).
|
|
||||||
ClearDocuments::new(self.wtxn, self.index).execute()?;
|
|
||||||
|
|
||||||
// We index the generated `TransformOutput` which must contain
|
// We index the generated `TransformOutput` which must contain
|
||||||
// all the documents with fields in the newly defined searchable order.
|
// all the documents with fields in the newly defined searchable order.
|
||||||
|
Loading…
Reference in New Issue
Block a user