Remove soft-deleted related methods from Index

This commit is contained in:
Louis Dureuil 2023-10-25 14:14:15 +02:00
parent c534a1b687
commit 113527f466
No known key found for this signature in database
11 changed files with 9 additions and 124 deletions

View File

@ -324,7 +324,6 @@ impl ErrorCode for milli::Error {
UserError::SerdeJson(_)
| UserError::InvalidLmdbOpenOptions
| UserError::DocumentLimitReached
| UserError::AccessingSoftDeletedDocument { .. }
| UserError::UnknownInternalDocumentId { .. } => Code::Internal,
UserError::InvalidStoreFile => Code::InvalidStoreFile,
UserError::NoSpaceLeftOnDevice => Code::NoSpaceLeftOnDevice,

View File

@ -89,8 +89,6 @@ pub enum FieldIdMapMissingEntry {
#[derive(Error, Debug)]
pub enum UserError {
#[error("A soft deleted internal document id have been used: `{document_id}`.")]
AccessingSoftDeletedDocument { document_id: DocumentId },
#[error("A document cannot contain more than 65,535 fields.")]
AttributeLimitReached,
#[error(transparent)]

View File

@ -40,7 +40,6 @@ pub mod main_key {
pub const DISPLAYED_FIELDS_KEY: &str = "displayed-fields";
pub const DISTINCT_FIELD_KEY: &str = "distinct-field-key";
pub const DOCUMENTS_IDS_KEY: &str = "documents-ids";
pub const SOFT_DELETED_DOCUMENTS_IDS_KEY: &str = "soft-deleted-documents-ids";
pub const HIDDEN_FACETED_FIELDS_KEY: &str = "hidden-faceted-fields";
pub const FILTERABLE_FIELDS_KEY: &str = "filterable-fields";
pub const SORTABLE_FIELDS_KEY: &str = "sortable-fields";
@ -367,29 +366,6 @@ impl Index {
Ok(count.unwrap_or_default())
}
/* deleted documents ids */
/// Writes the soft deleted documents ids.
pub(crate) fn put_soft_deleted_documents_ids(
&self,
wtxn: &mut RwTxn,
docids: &RoaringBitmap,
) -> heed::Result<()> {
self.main.put::<_, Str, RoaringBitmapCodec>(
wtxn,
main_key::SOFT_DELETED_DOCUMENTS_IDS_KEY,
docids,
)
}
/// Returns the soft deleted documents ids.
pub(crate) fn soft_deleted_documents_ids(&self, rtxn: &RoTxn) -> heed::Result<RoaringBitmap> {
Ok(self
.main
.get::<_, Str, RoaringBitmapCodec>(rtxn, main_key::SOFT_DELETED_DOCUMENTS_IDS_KEY)?
.unwrap_or_default())
}
/* primary key */
/// Writes the documents primary key, this is the field name that is used to store the id.
@ -1187,12 +1163,7 @@ impl Index {
rtxn: &'t RoTxn,
ids: impl IntoIterator<Item = DocumentId> + 'a,
) -> Result<impl Iterator<Item = Result<(DocumentId, obkv::KvReaderU16<'t>)>> + 'a> {
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
Ok(ids.into_iter().map(move |id| {
if soft_deleted_documents.contains(id) {
return Err(UserError::AccessingSoftDeletedDocument { document_id: id })?;
}
let kv = self
.documents
.get(rtxn, &BEU32::new(id))?
@ -1418,14 +1389,10 @@ impl Index {
rtxn: &RoTxn,
key: &(Script, Language),
) -> heed::Result<Option<RoaringBitmap>> {
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
let doc_ids = self.script_language_docids.get(rtxn, key)?;
Ok(doc_ids.map(|ids| ids - soft_deleted_documents))
Ok(self.script_language_docids.get(rtxn, key)?)
}
pub fn script_language(&self, rtxn: &RoTxn) -> heed::Result<HashMap<Script, Vec<Language>>> {
let soft_deleted_documents = self.soft_deleted_documents_ids(rtxn)?;
let mut script_language: HashMap<Script, Vec<Language>> = HashMap::new();
let mut script_language_doc_count: Vec<(Script, Language, u64)> = Vec::new();
let mut total = 0;
@ -1433,7 +1400,7 @@ impl Index {
let ((script, language), docids) = sl?;
// keep only Languages that contains at least 1 document.
let remaining_documents_count = (docids - &soft_deleted_documents).len();
let remaining_documents_count = docids.len();
total += remaining_documents_count;
if remaining_documents_count > 0 {
script_language_doc_count.push((script, language, remaining_documents_count));
@ -1918,7 +1885,6 @@ pub(crate) mod tests {
2 2
3 3
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
db_snap!(index, facet_id_f64_docids, 1, @r###"
1 0 0 1 [0, ]
1 0 1 1 [1, ]
@ -1943,7 +1909,6 @@ pub(crate) mod tests {
2 6
3 3
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[0, 1, 2, ]");
db_snap!(index, facet_id_f64_docids, 2, @r###"
1 0 0 1 [0, ]
1 0 1 1 [1, 4, ]
@ -1965,7 +1930,6 @@ pub(crate) mod tests {
2 6
3 3
"###);
db_snap!(index, soft_deleted_documents_ids, 3, @"[0, 1, 2, 3, ]");
db_snap!(index, facet_id_f64_docids, 3, @r###"
1 0 0 1 [0, ]
1 0 1 1 [1, 4, ]
@ -1989,7 +1953,6 @@ pub(crate) mod tests {
2 6
3 7
"###);
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
db_snap!(index, facet_id_f64_docids, 3, @r###"
0 0 0 1 [4, ]
0 0 1 1 [5, ]
@ -2052,7 +2015,6 @@ pub(crate) mod tests {
2 2
3 3
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
db_snap!(index, facet_id_f64_docids, 1, @r###"
1 0 0 1 [0, ]
1 0 1 1 [1, ]
@ -2085,7 +2047,6 @@ pub(crate) mod tests {
2 6
3 3
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[0, 1, 2, ]");
db_snap!(index, facet_id_f64_docids, 1, @r###"
1 0 0 1 [0, 4, ]
1 0 1 1 [1, 5, ]
@ -2153,7 +2114,6 @@ pub(crate) mod tests {
2 9
3 3
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[0, 1, 2, 4, 5, 6, ]");
db_snap!(index, facet_id_f64_docids, 1, @r###"
1 0 0 1 [0, 4, 7, ]
1 0 1 1 [1, 5, 8, ]
@ -2221,7 +2181,7 @@ pub(crate) mod tests {
2 12
3 3
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
db_snap!(index, facet_id_f64_docids, 1, @r###"
1 0 0 1 [10, ]
1 0 3 1 [3, 11, ]
@ -2291,7 +2251,6 @@ pub(crate) mod tests {
34 1
38 0
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
index.delete_document("34");
@ -2302,7 +2261,6 @@ pub(crate) mod tests {
34 1
38 0
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]");
index
.update_settings(|s| {
@ -2318,7 +2276,6 @@ pub(crate) mod tests {
hard:
38 0
"###);
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
// So that this document addition works correctly now.
// It would be wrongly interpreted as a replacement before
@ -2331,7 +2288,6 @@ pub(crate) mod tests {
34 1
38 0
"###);
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
// We do the test again, but deleting the document with id 0 instead of id 1 now
index.delete_document("38");
@ -2343,7 +2299,6 @@ pub(crate) mod tests {
34 1
38 0
"###);
db_snap!(index, soft_deleted_documents_ids, 5, @"[0, ]");
index
.update_settings(|s| {
@ -2357,7 +2312,6 @@ pub(crate) mod tests {
hard:
34 1
"###);
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
// And adding lots of documents afterwards instead of just one.
// These extra subtests don't add much, but it's better than nothing.
@ -2374,7 +2328,6 @@ pub(crate) mod tests {
41 3
42 5
"###);
db_snap!(index, soft_deleted_documents_ids, 7, @"[]");
}
#[test]
@ -2403,7 +2356,6 @@ pub(crate) mod tests {
30 0
34 1
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
index.delete_document("34");
@ -2414,7 +2366,6 @@ pub(crate) mod tests {
30 0
34 1
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[1, ]");
index
.update_settings(|s| {
@ -2430,7 +2381,6 @@ pub(crate) mod tests {
hard:
30 0
"###);
db_snap!(index, soft_deleted_documents_ids, 3, @"[]");
// So that when we add a new document
index.add_documents(documents!({ "primary_key": 35, "b": 2 })).unwrap();
@ -2444,7 +2394,6 @@ pub(crate) mod tests {
30 0
35 1
"###);
db_snap!(index, soft_deleted_documents_ids, 4, @"[]");
// And when we add 34 again, we don't replace document 35
index.add_documents(documents!({ "primary_key": 34, "a": 1 })).unwrap();
@ -2458,7 +2407,6 @@ pub(crate) mod tests {
34 2
35 1
"###);
db_snap!(index, soft_deleted_documents_ids, 5, @"[]");
let rtxn = index.read_txn().unwrap();
let (_docid, obkv) = index.documents(&rtxn, [0]).unwrap()[0];
@ -2499,7 +2447,6 @@ pub(crate) mod tests {
38 4
39 5
"###);
db_snap!(index, soft_deleted_documents_ids, 6, @"[]");
}
#[test]
@ -2530,7 +2477,6 @@ pub(crate) mod tests {
4 1
5 2
"###);
db_snap!(index, soft_deleted_documents_ids, 1, @"[]");
index.delete_document("3");
@ -2542,7 +2488,6 @@ pub(crate) mod tests {
4 1
5 2
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[0, ]");
index.add_documents(documents!([{ "primary_key": "4", "a": 2 }])).unwrap();
@ -2553,7 +2498,6 @@ pub(crate) mod tests {
4 3
5 2
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[]");
index
.add_documents(documents!([
@ -2569,7 +2513,6 @@ pub(crate) mod tests {
4 3
5 2
"###);
db_snap!(index, soft_deleted_documents_ids, 2, @"[]");
}
#[test]
@ -2598,7 +2541,6 @@ pub(crate) mod tests {
11 0
4 1
"###);
db_snap!(index, soft_deleted_documents_ids, @"[]");
index
.add_documents(documents!([
@ -2615,7 +2557,6 @@ pub(crate) mod tests {
11 0
4 2
"###);
db_snap!(index, soft_deleted_documents_ids, @"[1, ]");
let mut wtxn = index.write_txn().unwrap();
let mut delete = DeleteDocuments::new(&mut wtxn, &index).unwrap();
@ -2630,7 +2571,6 @@ pub(crate) mod tests {
11 0
4 2
"###);
db_snap!(index, soft_deleted_documents_ids, @"[]");
index
.add_documents(documents!([
@ -2647,7 +2587,6 @@ pub(crate) mod tests {
11 0
4 1
"###);
db_snap!(index, soft_deleted_documents_ids, @"[2, 3, ]");
let rtxn = index.read_txn().unwrap();
let search = Search::new(&rtxn, &index);

View File

@ -223,12 +223,9 @@ impl<'a> Filter<'a> {
impl<'a> Filter<'a> {
pub fn evaluate(&self, rtxn: &heed::RoTxn, index: &Index) -> Result<RoaringBitmap> {
// to avoid doing this for each recursive call we're going to do it ONCE ahead of time
let soft_deleted_documents = index.soft_deleted_documents_ids(rtxn)?;
let filterable_fields = index.filterable_fields(rtxn)?;
// and finally we delete all the soft_deleted_documents, again, only once at the very end
self.inner_evaluate(rtxn, index, &filterable_fields)
.map(|result| result - soft_deleted_documents)
}
fn evaluate_operator(

View File

@ -98,7 +98,6 @@ Create a snapshot test of the given database.
- `facet_id_string_docids`
- `documents_ids`
- `stop_words`
- `soft_deleted_documents_ids`
- `field_distribution`
- `fields_ids_map`
- `geo_faceted_documents_ids`
@ -308,12 +307,6 @@ pub fn snap_stop_words(index: &Index) -> String {
let snap = format!("{stop_words:?}");
snap
}
pub fn snap_soft_deleted_documents_ids(index: &Index) -> String {
let rtxn = index.read_txn().unwrap();
let soft_deleted_documents_ids = index.soft_deleted_documents_ids(&rtxn).unwrap();
display_bitmap(&soft_deleted_documents_ids)
}
pub fn snap_field_distributions(index: &Index) -> String {
let rtxn = index.read_txn().unwrap();
let mut snap = String::new();
@ -484,9 +477,6 @@ macro_rules! full_snap_of_db {
($index:ident, stop_words) => {{
$crate::snapshot_tests::snap_stop_words(&$index)
}};
($index:ident, soft_deleted_documents_ids) => {{
$crate::snapshot_tests::snap_soft_deleted_documents_ids(&$index)
}};
($index:ident, field_distribution) => {{
$crate::snapshot_tests::snap_field_distributions(&$index)
}};

View File

@ -8,16 +8,11 @@ pub struct AvailableDocumentsIds {
}
impl AvailableDocumentsIds {
pub fn from_documents_ids(
docids: &RoaringBitmap,
soft_deleted_docids: &RoaringBitmap,
) -> AvailableDocumentsIds {
let used_docids = docids | soft_deleted_docids;
match used_docids.max() {
pub fn from_documents_ids(docids: &RoaringBitmap) -> AvailableDocumentsIds {
match docids.max() {
Some(last_id) => {
let mut available = RoaringBitmap::from_iter(0..last_id);
available -= used_docids;
available -= docids;
let iter = match last_id.checked_add(1) {
Some(id) => id..=u32::max_value(),
@ -50,7 +45,7 @@ mod tests {
#[test]
fn empty() {
let base = RoaringBitmap::new();
let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new());
let left = AvailableDocumentsIds::from_documents_ids(&base);
let right = 0..=u32::max_value();
left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
}
@ -63,28 +58,8 @@ mod tests {
base.insert(100);
base.insert(405);
let left = AvailableDocumentsIds::from_documents_ids(&base, &RoaringBitmap::new());
let left = AvailableDocumentsIds::from_documents_ids(&base);
let right = (0..=u32::max_value()).filter(|&n| n != 0 && n != 10 && n != 100 && n != 405);
left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
}
#[test]
fn soft_deleted() {
let mut base = RoaringBitmap::new();
base.insert(0);
base.insert(10);
base.insert(100);
base.insert(405);
let mut soft_deleted = RoaringBitmap::new();
soft_deleted.insert(1);
soft_deleted.insert(11);
soft_deleted.insert(101);
soft_deleted.insert(406);
let left = AvailableDocumentsIds::from_documents_ids(&base, &soft_deleted);
let right =
(0..=u32::max_value()).filter(|&n| ![0, 1, 10, 11, 100, 101, 405, 406].contains(&n));
left.zip(right).take(500).for_each(|(l, r)| assert_eq!(l, r));
}
}

View File

@ -56,7 +56,6 @@ impl<'t, 'u, 'i> ClearDocuments<'t, 'u, 'i> {
self.index.put_words_prefixes_fst(self.wtxn, &fst::Set::default())?;
self.index.put_external_documents_ids(self.wtxn, &ExternalDocumentsIds::default())?;
self.index.put_documents_ids(self.wtxn, &empty_roaring)?;
self.index.put_soft_deleted_documents_ids(self.wtxn, &empty_roaring)?;
self.index.put_field_distribution(self.wtxn, &FieldDistribution::default())?;
self.index.delete_geo_rtree(self.wtxn)?;
self.index.delete_geo_faceted_documents_ids(self.wtxn)?;

View File

@ -594,7 +594,6 @@ mod tests {
index.add_documents(documents).unwrap();
db_snap!(index, facet_id_f64_docids, "initial", @"777e0e221d778764b472c512617eeb3b");
db_snap!(index, soft_deleted_documents_ids, "initial", @"[]");
let mut documents = vec![];
for i in 0..999 {
@ -616,7 +615,6 @@ mod tests {
index.add_documents(documents).unwrap();
db_snap!(index, facet_id_f64_docids, "replaced_1_soft", @"abba175d7bed727d0efadaef85a4388f");
db_snap!(index, soft_deleted_documents_ids, "replaced_1_soft", @"6c975deb900f286d2f6456d2d5c3a123");
// Then replace the last document while disabling soft_deletion
let mut documents = vec![];
@ -639,7 +637,6 @@ mod tests {
index.add_documents(documents).unwrap();
db_snap!(index, facet_id_f64_docids, "replaced_2_hard", @"029e27a46d09c574ae949aa4289b45e6");
db_snap!(index, soft_deleted_documents_ids, "replaced_2_hard", @"[]");
}
}

View File

@ -999,7 +999,6 @@ mod tests {
assert_eq!(count, 6);
db_snap!(index, word_docids, "updated");
db_snap!(index, soft_deleted_documents_ids, "updated", @"[0, 1, 4, ]");
drop(rtxn);
}
@ -2649,8 +2648,6 @@ mod tests {
0 1
"###);
db_snap!(index, soft_deleted_documents_ids, @"[]");
// BATCH 3
println!("--- ENTERING BATCH 3");

View File

@ -132,17 +132,13 @@ impl<'a, 'i> Transform<'a, 'i> {
indexer_settings.max_memory.map(|mem| mem / 2),
);
let documents_ids = index.documents_ids(wtxn)?;
let soft_deleted_documents_ids = index.soft_deleted_documents_ids(wtxn)?;
Ok(Transform {
index,
fields_ids_map: index.fields_ids_map(wtxn)?,
indexer_settings,
autogenerate_docids,
available_documents_ids: AvailableDocumentsIds::from_documents_ids(
&documents_ids,
&soft_deleted_documents_ids,
),
available_documents_ids: AvailableDocumentsIds::from_documents_ids(&documents_ids),
original_sorter,
flattened_sorter,
index_documents_method,

View File

@ -508,7 +508,6 @@ mod tests {
db_snap!(index, word_docids, "replaced");
db_snap!(index, word_prefix_pair_proximity_docids, "replaced");
db_snap!(index, prefix_word_pair_proximity_docids, "replaced");
db_snap!(index, soft_deleted_documents_ids, "replaced", @"[0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35, 36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, ]");
}
#[test]
@ -568,6 +567,5 @@ mod tests {
db_snap!(index, word_docids, "replaced");
db_snap!(index, word_prefix_pair_proximity_docids, "replaced");
db_snap!(index, prefix_word_pair_proximity_docids, "replaced");
db_snap!(index, soft_deleted_documents_ids, "replaced", @"[]");
}
}