Drastically speed up documents deletion updates

2025-06-18 12:47:35 +02:00 · 2020-11-19 11:17:53 +01:00 · 2020-11-19 11:17:53 +01:00 · 0694cc4916
commit 0694cc4916
parent 38c76754ef
1 changed files with 10 additions and 6 deletions
--- a/src/update/delete_documents.rs
+++ b/src/update/delete_documents.rs
@ -1,4 +1,5 @@
 use fst::IntoStreamer;
+use heed::types::ByteSlice;
 use roaring::RoaringBitmap;

 use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds};
@ -132,11 +133,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
            let mut iter = word_docids.prefix_iter_mut(self.wtxn, &word)?;
            if let Some((key, mut docids)) = iter.next().transpose()? {
                if key == word.as_ref() {
+                    let previous_len = docids.len();
                    docids.difference_with(&self.documents_ids);
                    if docids.is_empty() {
                        iter.del_current()?;
                        *must_remove = true;
-                    } else {
+                    } else if docids.len() != previous_len {
                        iter.put_current(key, &docids)?;
                    }
                }
@ -168,14 +170,15 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
        // We delete the documents ids that are under the pairs of words,
        // it is faster and use no memory to iterate over all the words pairs than
        // to compute the cartesian product of every words of the deleted documents.
-        let mut iter = word_pair_proximity_docids.iter_mut(self.wtxn)?;
+        let mut iter = word_pair_proximity_docids.remap_key_type::<ByteSlice>().iter_mut(self.wtxn)?;
        while let Some(result) = iter.next() {
-            let ((w1, w2, prox), mut docids) = result?;
+            let (bytes, mut docids) = result?;
+            let previous_len = docids.len();
            docids.difference_with(&self.documents_ids);
            if docids.is_empty() {
                iter.del_current()?;
-            } else {
-                iter.put_current(&(w1, w2, prox), &docids)?;
+            } else if docids.len() != previous_len {
+                iter.put_current(bytes, &docids)?;
            }
        }

@ -185,10 +188,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
        let mut iter = facet_field_id_value_docids.iter_mut(self.wtxn)?;
        while let Some(result) = iter.next() {
            let (bytes, mut docids) = result?;
+            let previous_len = docids.len();
            docids.difference_with(&self.documents_ids);
            if docids.is_empty() {
                iter.del_current()?;
-            } else {
+            } else if docids.len() != previous_len {
                iter.put_current(bytes, &docids)?;
            }
        }