From 9e5f9a8a1051b839b54572aac685bbde68538a8c Mon Sep 17 00:00:00 2001 From: Kerollmops Date: Wed, 23 Jun 2021 18:35:44 +0200 Subject: [PATCH 1/2] Add a test for the words level positions generation bug --- milli/src/update/index_documents/mod.rs | 41 +++++++++++++++++++++++++ 1 file changed, 41 insertions(+) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index a25b0f3a7..316b0eb81 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -836,6 +836,8 @@ impl<'t, 'u, 'i, 'a> IndexDocuments<'t, 'u, 'i, 'a> { #[cfg(test)] mod tests { + use std::io::Cursor; + use heed::EnvOpenOptions; use super::*; @@ -1258,4 +1260,43 @@ mod tests { drop(rtxn); } + + #[test] + fn simple_documents_replace() { + let path = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(10 * 1024 * 1024); // 10 MB + let index = Index::new(options, &path).unwrap(); + + // First we send 3 documents with an id for only one of them. + let mut wtxn = index.write_txn().unwrap(); + let documents = &r#"[ + { "id": 2, "title": "Pride and Prejudice", "author": "Jane Austin", "genre": "romance", "price": 3.5 }, + { "id": 456, "title": "Le Petit Prince", "author": "Antoine de Saint-Exupéry", "genre": "adventure" , "price": 10.0 }, + { "id": 1, "title": "Alice In Wonderland", "author": "Lewis Carroll", "genre": "fantasy", "price": 25.99 }, + { "id": 1344, "title": "The Hobbit", "author": "J. R. R. Tolkien", "genre": "fantasy" }, + { "id": 4, "title": "Harry Potter and the Half-Blood Prince", "author": "J. K. Rowling", "genre": "fantasy" }, + { "id": 42, "title": "The Hitchhiker's Guide to the Galaxy", "author": "Douglas Adams" } + ]"#[..]; + let mut builder = IndexDocuments::new(&mut wtxn, &index, 0); + builder.update_format(UpdateFormat::Json); + builder.index_documents_method(IndexDocumentsMethod::ReplaceDocuments); + builder.execute(Cursor::new(documents), |_, _| ()).unwrap(); + wtxn.commit().unwrap(); + + let mut wtxn = index.write_txn().unwrap(); + let mut builder = IndexDocuments::new(&mut wtxn, &index, 1); + builder.update_format(UpdateFormat::Json); + builder.index_documents_method(IndexDocumentsMethod::UpdateDocuments); + let documents = &r#"[ + { + "id": 2, + "author": "J. Austen", + "date": "1813" + } + ]"#[..]; + + builder.execute(Cursor::new(documents), |_, _| ()).unwrap(); + wtxn.commit().unwrap(); + } } From 0013236e5db378c68d72dedeaf0594e6a851cdd0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Mon, 28 Jun 2021 16:19:02 +0200 Subject: [PATCH 2/2] Fix the LMDB and heed invalid interactions. It is undefined behavior to keep a reference to the database while modifying it, we were keeping references in the database and also feeding the heed put_current methods with keys referenced inside the database itself. https://github.com/Kerollmops/heed/pull/108 --- milli/src/update/delete_documents.rs | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/milli/src/update/delete_documents.rs b/milli/src/update/delete_documents.rs index e291eb106..30ae55e62 100644 --- a/milli/src/update/delete_documents.rs +++ b/milli/src/update/delete_documents.rs @@ -197,7 +197,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { iter.del_current()?; *must_remove = true; } else if docids.len() != previous_len { - iter.put_current(key, &docids)?; + let key = key.to_owned(); + iter.put_current(&key, &docids)?; } } } @@ -238,13 +239,14 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { let mut iter = word_prefix_docids.iter_mut(self.wtxn)?; while let Some(result) = iter.next() { let (prefix, mut docids) = result?; + let prefix = prefix.to_owned(); let previous_len = docids.len(); docids.difference_with(&self.documents_ids); if docids.is_empty() { iter.del_current()?; prefixes_to_delete.insert(prefix)?; } else if docids.len() != previous_len { - iter.put_current(prefix, &docids)?; + iter.put_current(&prefix, &docids)?; } } @@ -281,7 +283,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { if docids.is_empty() { iter.del_current()?; } else if docids.len() != previous_len { - iter.put_current(key, &docids)?; + let key = key.to_owned(); + iter.put_current(&key, &docids)?; } } @@ -299,7 +302,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { if docids.is_empty() { iter.del_current()?; } else if docids.len() != previous_len { - iter.put_current(bytes, &docids)?; + let bytes = bytes.to_owned(); + iter.put_current(&bytes, &docids)?; } } @@ -315,7 +319,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { if docids.is_empty() { iter.del_current()?; } else if docids.len() != previous_len { - iter.put_current(bytes, &docids)?; + let bytes = bytes.to_owned(); + iter.put_current(&bytes, &docids)?; } } @@ -331,7 +336,8 @@ impl<'t, 'u, 'i> DeleteDocuments<'t, 'u, 'i> { if docids.is_empty() { iter.del_current()?; } else if docids.len() != previous_len { - iter.put_current(bytes, &docids)?; + let bytes = bytes.to_owned(); + iter.put_current(&bytes, &docids)?; } } @@ -437,7 +443,8 @@ where if docids.is_empty() { iter.del_current()?; } else if docids.len() != previous_len { - iter.put_current(bytes, &docids)?; + let bytes = bytes.to_owned(); + iter.put_current(&bytes, &docids)?; } }