From 696fcf4d185793f2ffaa2274dc45700128e06dd2 Mon Sep 17 00:00:00 2001 From: Louis Dureuil Date: Tue, 24 Oct 2023 11:03:35 +0200 Subject: [PATCH] Fix document insertion into LMDB --- .../src/update/index_documents/typed_chunk.rs | 21 ++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 9d4d63f90..6a2ea8486 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -8,6 +8,7 @@ use charabia::{Language, Script}; use grenad::MergerBuilder; use heed::types::ByteSlice; use heed::RwTxn; +use obkv::{KvReader, KvWriter}; use roaring::RoaringBitmap; use super::helpers::{self, merge_ignore_values, valid_lmdb_key, CursorClonableMmap}; @@ -19,7 +20,9 @@ use crate::index::Hnsw; use crate::update::del_add::{DelAdd, KvReaderDelAdd}; use crate::update::facet::FacetsUpdate; use crate::update::index_documents::helpers::{as_cloneable_grenad, try_split_array_at}; -use crate::{lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result, BEU32}; +use crate::{ + lat_lng_to_xyz, CboRoaringBitmapCodec, DocumentId, FieldId, GeoPoint, Index, Result, BEU32, +}; pub(crate) enum TypedChunk { FieldIdDocidFacetStrings(grenad::Reader), @@ -120,8 +123,20 @@ pub(crate) fn write_typed_chunk_into_index( match typed_chunk { TypedChunk::Documents(obkv_documents_iter) => { let mut cursor = obkv_documents_iter.into_cursor()?; - while let Some((key, value)) = cursor.move_on_next()? { - index.documents.remap_types::().put(wtxn, key, value)?; + while let Some((docid, reader)) = cursor.move_on_next()? { + let mut writer: KvWriter<_, FieldId> = KvWriter::memory(); + let reader: KvReader = KvReader::new(reader); + for (field_id, value) in reader.iter() { + let Some(value) = KvReaderDelAdd::new(value).get(DelAdd::Addition) else { + continue; + }; + writer.insert(field_id, value)?; + } + index.documents.remap_types::().put( + wtxn, + docid, + &writer.into_inner().unwrap(), + )?; } } TypedChunk::FieldIdWordCountDocids(fid_word_count_docids_iter) => {