From b15e8aacb662a9d76b288a56281f6787146e6a7f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 2 Jul 2024 15:50:22 +0200 Subject: [PATCH] Fix merging of documents to support compressed documents --- milli/src/update/index_documents/transform.rs | 27 ++++++++++--------- 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index cbd2a9006..4b4a501df 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -168,10 +168,12 @@ impl<'a, 'i> Transform<'a, 'i> { let external_documents_ids = self.index.external_documents_ids(); let mapping = create_fields_mapping(&mut self.fields_ids_map, &fields_index)?; + let dictionary = self.index.document_compression_dictionary(wtxn)?; let primary_key = cursor.primary_key().to_string(); let primary_key_id = self.fields_ids_map.insert(&primary_key).ok_or(UserError::AttributeLimitReached)?; + let mut decompression_buffer = Vec::new(); let mut obkv_buffer = Vec::new(); let mut document_sorter_value_buffer = Vec::new(); let mut document_sorter_key_buffer = Vec::new(); @@ -247,18 +249,20 @@ impl<'a, 'i> Transform<'a, 'i> { let mut skip_insertion = false; if let Some(original_docid) = original_docid { let original_key = original_docid; - let base_obkv = self - .index - .documents - .remap_data_type::() - .get(wtxn, &original_key)? - .ok_or(InternalError::DatabaseMissingEntry { - db_name: db_name::DOCUMENTS, - key: None, - })?; + let base_compressed_obkv = self.index.documents.get(wtxn, &original_key)?.ok_or( + InternalError::DatabaseMissingEntry { db_name: db_name::DOCUMENTS, key: None }, + )?; + + let base_obkv = match dictionary { + // TODO manage this unwrap correctly + Some(dict) => base_compressed_obkv + .decompress_with(&mut decompression_buffer, dict) + .unwrap(), + None => base_compressed_obkv.as_non_compressed(), + }; // we check if the two documents are exactly equal. If it's the case we can skip this document entirely - if base_obkv == obkv_buffer { + if base_obkv.as_bytes() == obkv_buffer { // we're not replacing anything self.replaced_documents_ids.remove(original_docid); // and we need to put back the original id as it was before @@ -278,13 +282,12 @@ impl<'a, 'i> Transform<'a, 'i> { document_sorter_value_buffer.clear(); document_sorter_value_buffer.push(Operation::Addition as u8); into_del_add_obkv( - KvReaderU16::new(base_obkv), + base_obkv, deladd_operation, &mut document_sorter_value_buffer, )?; self.original_sorter .insert(&document_sorter_key_buffer, &document_sorter_value_buffer)?; - let base_obkv = KvReader::new(base_obkv); if let Some(flattened_obkv) = Self::flatten_from_fields_ids_map(&base_obkv, &mut self.fields_ids_map)? {