From ff8d7a810de935db3f35583e3c3dba34d1ca32a7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 16 Feb 2022 15:40:08 +0100 Subject: [PATCH] Change the behavior of the as_cloneable_grenad by taking a ref --- .../src/update/index_documents/extract/mod.rs | 10 ++--- .../index_documents/helpers/grenad_helpers.rs | 8 ++-- .../src/update/index_documents/helpers/mod.rs | 2 +- milli/src/update/index_documents/mod.rs | 42 ++++--------------- .../src/update/index_documents/typed_chunk.rs | 4 +- 5 files changed, 21 insertions(+), 45 deletions(-) diff --git a/milli/src/update/index_documents/extract/mod.rs b/milli/src/update/index_documents/extract/mod.rs index 0f04418ed..4c81b9334 100644 --- a/milli/src/update/index_documents/extract/mod.rs +++ b/milli/src/update/index_documents/extract/mod.rs @@ -25,7 +25,7 @@ use self::extract_word_docids::extract_word_docids; use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids; use self::extract_word_position_docids::extract_word_position_docids; use super::helpers::{ - into_clonable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps, + as_cloneable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps, merge_readers, merge_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn, }; use super::{helpers, TypedChunk}; @@ -184,7 +184,7 @@ fn extract_documents_data( grenad::Reader, (grenad::Reader, grenad::Reader), )> { - let documents_chunk = documents_chunk.and_then(|c| unsafe { into_clonable_grenad(c) })?; + let documents_chunk = documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?; let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone()))); @@ -217,7 +217,7 @@ fn extract_documents_data( // send docid_word_positions_chunk to DB writer let docid_word_positions_chunk = - unsafe { into_clonable_grenad(docid_word_positions_chunk)? }; + unsafe { as_cloneable_grenad(&docid_word_positions_chunk)? }; let _ = lmdb_writer_sx .send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone()))); @@ -233,7 +233,7 @@ fn extract_documents_data( // send docid_fid_facet_numbers_chunk to DB writer let docid_fid_facet_numbers_chunk = - unsafe { into_clonable_grenad(docid_fid_facet_numbers_chunk)? }; + unsafe { as_cloneable_grenad(&docid_fid_facet_numbers_chunk)? }; let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetNumbers( docid_fid_facet_numbers_chunk.clone(), @@ -241,7 +241,7 @@ fn extract_documents_data( // send docid_fid_facet_strings_chunk to DB writer let docid_fid_facet_strings_chunk = - unsafe { into_clonable_grenad(docid_fid_facet_strings_chunk)? }; + unsafe { as_cloneable_grenad(&docid_fid_facet_strings_chunk)? }; let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetStrings( docid_fid_facet_strings_chunk.clone(), diff --git a/milli/src/update/index_documents/helpers/grenad_helpers.rs b/milli/src/update/index_documents/helpers/grenad_helpers.rs index ec4a32755..ded74b2af 100644 --- a/milli/src/update/index_documents/helpers/grenad_helpers.rs +++ b/milli/src/update/index_documents/helpers/grenad_helpers.rs @@ -68,11 +68,11 @@ pub fn writer_into_reader(writer: grenad::Writer) -> Result, +pub unsafe fn as_cloneable_grenad( + reader: &grenad::Reader, ) -> Result> { - let file = reader.into_inner(); - let mmap = memmap2::Mmap::map(&file)?; + let file = reader.get_ref(); + let mmap = memmap2::Mmap::map(file)?; let cursor = io::Cursor::new(ClonableMmap::from(mmap)); let reader = grenad::Reader::new(cursor)?; Ok(reader) diff --git a/milli/src/update/index_documents/helpers/mod.rs b/milli/src/update/index_documents/helpers/mod.rs index bbb2b9b95..22c1cfd6c 100644 --- a/milli/src/update/index_documents/helpers/mod.rs +++ b/milli/src/update/index_documents/helpers/mod.rs @@ -8,7 +8,7 @@ use std::convert::{TryFrom, TryInto}; pub use clonable_mmap::{ClonableMmap, CursorClonableMmap}; use fst::{IntoStreamer, Streamer}; pub use grenad_helpers::{ - create_sorter, create_writer, grenad_obkv_into_chunks, into_clonable_grenad, merge_readers, + as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks, merge_readers, sorter_into_lmdb_database, sorter_into_reader, write_into_lmdb_database, writer_into_reader, GrenadParameters, }; diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs index a31d1875b..c69aae809 100644 --- a/milli/src/update/index_documents/mod.rs +++ b/milli/src/update/index_documents/mod.rs @@ -16,9 +16,9 @@ use slice_group_by::GroupBy; use typed_chunk::{write_typed_chunk_into_index, TypedChunk}; pub use self::helpers::{ - create_sorter, create_writer, fst_stream_into_hashset, fst_stream_into_vec, - merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, sorter_into_lmdb_database, - write_into_lmdb_database, writer_into_reader, ClonableMmap, MergeFn, + as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset, + fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, + sorter_into_lmdb_database, write_into_lmdb_database, writer_into_reader, ClonableMmap, MergeFn, }; use self::helpers::{grenad_obkv_into_chunks, GrenadParameters}; pub use self::transform::{Transform, TransformOutput}; @@ -292,42 +292,18 @@ where for result in lmdb_writer_rx { let typed_chunk = match result? { TypedChunk::WordDocids(chunk) => { - // We extract and mmap our chunk file to be able to get it for next processes. - let mut file = chunk.into_inner(); - let mmap = unsafe { memmap2::Mmap::map(&file)? }; - let cursor_mmap = CursorClonableMmap::new(ClonableMmap::from(mmap)); - let chunk = grenad::Reader::new(cursor_mmap)?; - word_docids.push(chunk); - - // We reconstruct our typed-chunk back. - file.rewind()?; - let chunk = grenad::Reader::new(file)?; + let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? }; + word_docids.push(cloneable_chunk); TypedChunk::WordDocids(chunk) } TypedChunk::WordPairProximityDocids(chunk) => { - // We extract and mmap our chunk file to be able to get it for next processes. - let mut file = chunk.into_inner(); - let mmap = unsafe { memmap2::Mmap::map(&file)? }; - let cursor_mmap = CursorClonableMmap::new(ClonableMmap::from(mmap)); - let chunk = grenad::Reader::new(cursor_mmap)?; - word_pair_proximity_docids.push(chunk); - - // We reconstruct our typed-chunk back. - file.rewind()?; - let chunk = grenad::Reader::new(file)?; + let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? }; + word_pair_proximity_docids.push(cloneable_chunk); TypedChunk::WordPairProximityDocids(chunk) } TypedChunk::WordPositionDocids(chunk) => { - // We extract and mmap our chunk file to be able to get it for next processes. - let mut file = chunk.into_inner(); - let mmap = unsafe { memmap2::Mmap::map(&file)? }; - let cursor_mmap = CursorClonableMmap::new(ClonableMmap::from(mmap)); - let chunk = grenad::Reader::new(cursor_mmap)?; - word_position_docids.push(chunk); - - // We reconstruct our typed-chunk back. - file.rewind()?; - let chunk = grenad::Reader::new(file)?; + let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? }; + word_position_docids.push(cloneable_chunk); TypedChunk::WordPositionDocids(chunk) } otherwise => otherwise, diff --git a/milli/src/update/index_documents/typed_chunk.rs b/milli/src/update/index_documents/typed_chunk.rs index 3c77de7a1..77ea31138 100644 --- a/milli/src/update/index_documents/typed_chunk.rs +++ b/milli/src/update/index_documents/typed_chunk.rs @@ -12,7 +12,7 @@ use super::helpers::{ CursorClonableMmap, }; use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string}; -use crate::update::index_documents::helpers::into_clonable_grenad; +use crate::update::index_documents::helpers::as_cloneable_grenad; use crate::{ lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, Result, @@ -87,7 +87,7 @@ pub(crate) fn write_typed_chunk_into_index( return Ok((documents_ids, is_merged_database)) } TypedChunk::WordDocids(word_docids_iter) => { - let word_docids_iter = unsafe { into_clonable_grenad(word_docids_iter) }?; + let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_iter) }?; append_entries_into_database( word_docids_iter.clone(), &index.word_docids,