mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-27 07:14:26 +01:00
Fix a documents indexing bug and add a test
This commit is contained in:
parent
99da69c85f
commit
7cc1a358f5
@ -47,7 +47,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
|
|
||||||
pub fn execute(self) -> anyhow::Result<usize> {
|
pub fn execute(self) -> anyhow::Result<usize> {
|
||||||
// We retrieve remove the deleted documents ids and write them into the database.
|
// We retrieve the current documents ids that are in the database.
|
||||||
let mut documents_ids = self.index.documents_ids(self.wtxn)?;
|
let mut documents_ids = self.index.documents_ids(self.wtxn)?;
|
||||||
|
|
||||||
// We can and must stop removing documents in a database that is empty.
|
// We can and must stop removing documents in a database that is empty.
|
||||||
@ -55,8 +55,10 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
return Ok(0);
|
return Ok(0);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We remove the documents ids that we want to delete
|
||||||
|
// from the documents in the database and write them back.
|
||||||
let current_documents_ids_len = documents_ids.len();
|
let current_documents_ids_len = documents_ids.len();
|
||||||
documents_ids.intersect_with(&self.documents_ids);
|
documents_ids.difference_with(&self.documents_ids);
|
||||||
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
|
self.index.put_documents_ids(self.wtxn, &documents_ids)?;
|
||||||
|
|
||||||
// We can execute a ClearDocuments operation when the number of documents
|
// We can execute a ClearDocuments operation when the number of documents
|
||||||
@ -80,7 +82,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
// Retrieve the words and the users ids contained in the documents.
|
// Retrieve the words and the users ids contained in the documents.
|
||||||
let mut words = Vec::new();
|
let mut words = Vec::new();
|
||||||
let mut users_ids = Vec::new();
|
let mut users_ids = Vec::new();
|
||||||
for docid in &documents_ids {
|
for docid in &self.documents_ids {
|
||||||
// We create an iterator to be able to get the content and delete the document
|
// We create an iterator to be able to get the content and delete the document
|
||||||
// content itself. It's faster to acquire a cursor to get and delete,
|
// content itself. It's faster to acquire a cursor to get and delete,
|
||||||
// as we avoid traversing the LMDB B-Tree two times but only once.
|
// as we avoid traversing the LMDB B-Tree two times but only once.
|
||||||
@ -144,7 +146,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
let mut iter = word_docids.prefix_iter_mut(self.wtxn, &word)?;
|
let mut iter = word_docids.prefix_iter_mut(self.wtxn, &word)?;
|
||||||
if let Some((key, mut docids)) = iter.next().transpose()? {
|
if let Some((key, mut docids)) = iter.next().transpose()? {
|
||||||
if key == word.as_ref() {
|
if key == word.as_ref() {
|
||||||
docids.difference_with(&mut documents_ids);
|
docids.difference_with(&self.documents_ids);
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
iter.del_current()?;
|
iter.del_current()?;
|
||||||
*must_remove = true;
|
*must_remove = true;
|
||||||
@ -181,7 +183,7 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
let mut iter = word_pair_proximity_docids.iter_mut(self.wtxn)?;
|
let mut iter = word_pair_proximity_docids.iter_mut(self.wtxn)?;
|
||||||
while let Some(result) = iter.next() {
|
while let Some(result) = iter.next() {
|
||||||
let ((w1, w2, prox), mut docids) = result?;
|
let ((w1, w2, prox), mut docids) = result?;
|
||||||
docids.difference_with(&documents_ids);
|
docids.difference_with(&self.documents_ids);
|
||||||
if docids.is_empty() {
|
if docids.is_empty() {
|
||||||
iter.del_current()?;
|
iter.del_current()?;
|
||||||
} else {
|
} else {
|
||||||
@ -189,6 +191,6 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
Ok(documents_ids.len() as usize)
|
Ok(self.documents_ids.len() as usize)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -484,3 +484,54 @@ impl<'t, 'u, 'i> IndexDocuments<'t, 'u, 'i> {
|
|||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
use heed::EnvOpenOptions;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn simple_replacement() {
|
||||||
|
let path = tempfile::tempdir().unwrap();
|
||||||
|
let mut options = EnvOpenOptions::new();
|
||||||
|
options.map_size(10 * 1024 * 1024); // 10 MB
|
||||||
|
|
||||||
|
let index = Index::new(options, &path).unwrap();
|
||||||
|
|
||||||
|
// First we send 3 documents with ids from 1 to 3.
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let content = &b"id,name\n1,kevin\n2,kevina\n3,benoit\n"[..];
|
||||||
|
IndexDocuments::new(&mut wtxn, &index).execute(content, |_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// Check that there is 3 documents now.
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let count = index.number_of_documents(&rtxn).unwrap();
|
||||||
|
assert_eq!(count, 3);
|
||||||
|
drop(rtxn);
|
||||||
|
|
||||||
|
// Second we send 1 document with id 1, to erase the previous ones.
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let content = &b"id,name\n1,updated kevin\n"[..];
|
||||||
|
IndexDocuments::new(&mut wtxn, &index).execute(content, |_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// Check that there is **always*** 3 documents.
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let count = index.number_of_documents(&rtxn).unwrap();
|
||||||
|
assert_eq!(count, 3);
|
||||||
|
drop(rtxn);
|
||||||
|
|
||||||
|
// Third we send 3 documents again to replace the existing ones.
|
||||||
|
let mut wtxn = index.write_txn().unwrap();
|
||||||
|
let content = &b"id,name\n1,updated second kevin\n2,updated kevina\n3,updated benoit\n"[..];
|
||||||
|
IndexDocuments::new(&mut wtxn, &index).execute(content, |_, _| ()).unwrap();
|
||||||
|
wtxn.commit().unwrap();
|
||||||
|
|
||||||
|
// Check that there is **always*** 3 documents.
|
||||||
|
let rtxn = index.read_txn().unwrap();
|
||||||
|
let count = index.number_of_documents(&rtxn).unwrap();
|
||||||
|
assert_eq!(count, 3);
|
||||||
|
drop(rtxn);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
Loading…
Reference in New Issue
Block a user