mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Drastically speed up documents deletion updates
This commit is contained in:
parent
38c76754ef
commit
0694cc4916
@ -1,4 +1,5 @@
|
|||||||
use fst::IntoStreamer;
|
use fst::IntoStreamer;
|
||||||
|
use heed::types::ByteSlice;
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds};
|
use crate::{Index, BEU32, SmallString32, ExternalDocumentsIds};
|
||||||
@ -132,11 +133,12 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
let mut iter = word_docids.prefix_iter_mut(self.wtxn, &word)?;
|
let mut iter = word_docids.prefix_iter_mut(self.wtxn, &word)?;
|
||||||
if let Some((key, mut docids)) = iter.next().transpose()? {
|
if let Some((key, mut docids)) = iter.next().transpose()? {
|
||||||
if key == word.as_ref() {
|
if key == word.as_ref() {
|
||||||
|
let previous_len = docids.len();
|
||||||
docids.difference_with(&self.documents_ids);
|
docids.difference_with(&self.documents_ids);
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
iter.del_current()?;
|
iter.del_current()?;
|
||||||
*must_remove = true;
|
*must_remove = true;
|
||||||
} else {
|
} else if docids.len() != previous_len {
|
||||||
iter.put_current(key, &docids)?;
|
iter.put_current(key, &docids)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@ -168,14 +170,15 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
// We delete the documents ids that are under the pairs of words,
|
// We delete the documents ids that are under the pairs of words,
|
||||||
// it is faster and use no memory to iterate over all the words pairs than
|
// it is faster and use no memory to iterate over all the words pairs than
|
||||||
// to compute the cartesian product of every words of the deleted documents.
|
// to compute the cartesian product of every words of the deleted documents.
|
||||||
let mut iter = word_pair_proximity_docids.iter_mut(self.wtxn)?;
|
let mut iter = word_pair_proximity_docids.remap_key_type::<ByteSlice>().iter_mut(self.wtxn)?;
|
||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let ((w1, w2, prox), mut docids) = result?;
|
let (bytes, mut docids) = result?;
|
||||||
|
let previous_len = docids.len();
|
||||||
docids.difference_with(&self.documents_ids);
|
docids.difference_with(&self.documents_ids);
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
iter.del_current()?;
|
iter.del_current()?;
|
||||||
} else {
|
} else if docids.len() != previous_len {
|
||||||
iter.put_current(&(w1, w2, prox), &docids)?;
|
iter.put_current(bytes, &docids)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@ -185,10 +188,11 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
let mut iter = facet_field_id_value_docids.iter_mut(self.wtxn)?;
|
let mut iter = facet_field_id_value_docids.iter_mut(self.wtxn)?;
|
||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let (bytes, mut docids) = result?;
|
let (bytes, mut docids) = result?;
|
||||||
|
let previous_len = docids.len();
|
||||||
docids.difference_with(&self.documents_ids);
|
docids.difference_with(&self.documents_ids);
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
iter.del_current()?;
|
iter.del_current()?;
|
||||||
} else {
|
} else if docids.len() != previous_len {
|
||||||
iter.put_current(bytes, &docids)?;
|
iter.put_current(bytes, &docids)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user