From 31776fdc3ffcec5448024bf725c2e9f108b5a76e Mon Sep 17 00:00:00 2001 From: ad hoc Date: Tue, 7 Jun 2022 12:24:06 +0200 Subject: [PATCH 1/2] add failing test --- milli/src/update/index_documents/mod.rs | 48 +++++++++++++++++++++++++ 1 file changed, 48 insertions(+) diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index 117233611..5b6af12ae 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -1881,4 +1881,52 @@ mod tests { wtxn.commit().unwrap(); } + + #[test] + fn index_documents_in_multiple_transforms() { + let tmp = tempfile::tempdir().unwrap(); + let mut options = EnvOpenOptions::new(); + options.map_size(4096 * 100); + let index = Index::new(options, tmp).unwrap(); + let mut wtxn = index.write_txn().unwrap(); + let indexer_config = IndexerConfig::default(); + let mut builder = IndexDocuments::new( + &mut wtxn, + &index, + &indexer_config, + IndexDocumentsConfig::default(), + |_| (), + ) + .unwrap(); + + let doc1 = documents! {[{ + "id": 228142, + "title": "asdsad", + "state": "automated", + "priority": "normal", + "public_uid": "37ccf021", + "project_id": 78207, + "branch_id_number": 0 + }]}; + + let doc2 = documents! {[{ + "id": 228143, + "title": "something", + "state": "automated", + "priority": "normal", + "public_uid": "39c6499b", + "project_id": 78207, + "branch_id_number": 0 + }]}; + + builder.add_documents(doc1).unwrap(); + builder.add_documents(doc2).unwrap(); + + builder.execute().unwrap(); + + let map = index.external_documents_ids(&wtxn).unwrap().to_hash_map(); + let ids = map.values().collect::>(); + + assert_eq!(ids.len(), map.len()); + } } From d0aaa7ff0057204762006ee3263019d94e21abe1 Mon Sep 17 00:00:00 2001 From: Tamo Date: Tue, 7 Jun 2022 15:44:55 +0200 Subject: [PATCH 2/2] Fix wrong internal ids assignments --- milli/src/update/index_documents/transform.rs | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 9238212fd..08d450578 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -51,6 +51,7 @@ pub struct Transform<'a, 'i> { indexer_settings: &'a IndexerConfig, pub autogenerate_docids: bool, pub index_documents_method: IndexDocumentsMethod, + available_documents_ids: AvailableDocumentsIds, original_sorter: grenad::Sorter, flattened_sorter: grenad::Sorter, @@ -128,12 +129,14 @@ impl<'a, 'i> Transform<'a, 'i> { indexer_settings.max_nb_chunks, indexer_settings.max_memory.map(|mem| mem / 2), ); + let documents_ids = index.documents_ids(wtxn)?; Ok(Transform { index, fields_ids_map: index.fields_ids_map(wtxn)?, indexer_settings, autogenerate_docids, + available_documents_ids: AvailableDocumentsIds::from_documents_ids(&documents_ids), original_sorter, flattened_sorter, index_documents_method, @@ -156,8 +159,6 @@ impl<'a, 'i> Transform<'a, 'i> { { let fields_index = reader.index(); let external_documents_ids = self.index.external_documents_ids(wtxn)?; - let documents_ids = self.index.documents_ids(wtxn)?; - let mut available_documents_ids = AvailableDocumentsIds::from_documents_ids(&documents_ids); let mapping = create_fields_mapping(&mut self.fields_ids_map, fields_index)?; @@ -261,7 +262,8 @@ impl<'a, 'i> Transform<'a, 'i> { // if the document has never been encountered we give it a new docid // and push this new docid to the external documents ids builder Entry::Vacant(entry) => { - let new_docid = available_documents_ids + let new_docid = self + .available_documents_ids .next() .ok_or(UserError::DocumentLimitReached)?; entry.insert(new_docid as u64);