mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-23 13:24:27 +01:00
Improve documents deletion by iterating over all the word pair positions
This commit is contained in:
parent
3889d956d9
commit
d6338af766
@ -2,7 +2,6 @@ use std::borrow::Cow;
|
|||||||
use std::convert::TryFrom;
|
use std::convert::TryFrom;
|
||||||
|
|
||||||
use fst::{IntoStreamer, Streamer};
|
use fst::{IntoStreamer, Streamer};
|
||||||
use itertools::Itertools;
|
|
||||||
use roaring::RoaringBitmap;
|
use roaring::RoaringBitmap;
|
||||||
|
|
||||||
use crate::{Index, BEU32};
|
use crate::{Index, BEU32};
|
||||||
@ -168,13 +167,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
// We write the new words FST into the main database.
|
// We write the new words FST into the main database.
|
||||||
self.index.put_words_fst(self.wtxn, &new_words_fst)?;
|
self.index.put_words_fst(self.wtxn, &new_words_fst)?;
|
||||||
|
|
||||||
// We delete the documents ids that are under the pairs of words we found.
|
// We delete the documents ids that are under the pairs of words,
|
||||||
// TODO We can maybe improve this by using the `compute_words_pair_proximities`
|
// it is faster and use no memory to iterate over all the words pairs than
|
||||||
// function instead of iterating over all the possible word pairs.
|
// to compute the cartesian product of every words of the deleted documents.
|
||||||
for ((w1, _), (w2, _)) in words.iter().cartesian_product(&words) {
|
let mut iter = word_pair_proximity_docids.iter_mut(self.wtxn)?;
|
||||||
let start = &(w1.as_str(), w2.as_str(), 0);
|
|
||||||
let end = &(w1.as_str(), w2.as_str(), 7);
|
|
||||||
let mut iter = word_pair_proximity_docids.range_mut(self.wtxn, &(start..=end))?;
|
|
||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let ((w1, w2, prox), mut docids) = result?;
|
let ((w1, w2, prox), mut docids) = result?;
|
||||||
docids.difference_with(&documents_ids);
|
docids.difference_with(&documents_ids);
|
||||||
@ -184,7 +180,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
iter.put_current(&(w1, w2, prox), &docids)?;
|
iter.put_current(&(w1, w2, prox), &docids)?;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
Ok(documents_ids.len() as usize)
|
Ok(documents_ids.len() as usize)
|
||||||
}
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user