Change the behavior of the as_cloneable_grenad by taking a ref

This commit is contained in:
Clément Renault 2022-02-16 15:40:08 +01:00
parent f367cc2e75
commit ff8d7a810d
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
5 changed files with 21 additions and 45 deletions

View File

@ -25,7 +25,7 @@ use self::extract_word_docids::extract_word_docids;
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids; use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
use self::extract_word_position_docids::extract_word_position_docids; use self::extract_word_position_docids::extract_word_position_docids;
use super::helpers::{ use super::helpers::{
into_clonable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps, as_cloneable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps,
merge_readers, merge_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn, merge_readers, merge_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn,
}; };
use super::{helpers, TypedChunk}; use super::{helpers, TypedChunk};
@ -184,7 +184,7 @@ fn extract_documents_data(
grenad::Reader<CursorClonableMmap>, grenad::Reader<CursorClonableMmap>,
(grenad::Reader<CursorClonableMmap>, grenad::Reader<CursorClonableMmap>), (grenad::Reader<CursorClonableMmap>, grenad::Reader<CursorClonableMmap>),
)> { )> {
let documents_chunk = documents_chunk.and_then(|c| unsafe { into_clonable_grenad(c) })?; let documents_chunk = documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone()))); let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone())));
@ -217,7 +217,7 @@ fn extract_documents_data(
// send docid_word_positions_chunk to DB writer // send docid_word_positions_chunk to DB writer
let docid_word_positions_chunk = let docid_word_positions_chunk =
unsafe { into_clonable_grenad(docid_word_positions_chunk)? }; unsafe { as_cloneable_grenad(&docid_word_positions_chunk)? };
let _ = lmdb_writer_sx let _ = lmdb_writer_sx
.send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone()))); .send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone())));
@ -233,7 +233,7 @@ fn extract_documents_data(
// send docid_fid_facet_numbers_chunk to DB writer // send docid_fid_facet_numbers_chunk to DB writer
let docid_fid_facet_numbers_chunk = let docid_fid_facet_numbers_chunk =
unsafe { into_clonable_grenad(docid_fid_facet_numbers_chunk)? }; unsafe { as_cloneable_grenad(&docid_fid_facet_numbers_chunk)? };
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetNumbers( let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetNumbers(
docid_fid_facet_numbers_chunk.clone(), docid_fid_facet_numbers_chunk.clone(),
@ -241,7 +241,7 @@ fn extract_documents_data(
// send docid_fid_facet_strings_chunk to DB writer // send docid_fid_facet_strings_chunk to DB writer
let docid_fid_facet_strings_chunk = let docid_fid_facet_strings_chunk =
unsafe { into_clonable_grenad(docid_fid_facet_strings_chunk)? }; unsafe { as_cloneable_grenad(&docid_fid_facet_strings_chunk)? };
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetStrings( let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetStrings(
docid_fid_facet_strings_chunk.clone(), docid_fid_facet_strings_chunk.clone(),

View File

@ -68,11 +68,11 @@ pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader
grenad::Reader::new(file).map_err(Into::into) grenad::Reader::new(file).map_err(Into::into)
} }
pub unsafe fn into_clonable_grenad( pub unsafe fn as_cloneable_grenad(
reader: grenad::Reader<File>, reader: &grenad::Reader<File>,
) -> Result<grenad::Reader<CursorClonableMmap>> { ) -> Result<grenad::Reader<CursorClonableMmap>> {
let file = reader.into_inner(); let file = reader.get_ref();
let mmap = memmap2::Mmap::map(&file)?; let mmap = memmap2::Mmap::map(file)?;
let cursor = io::Cursor::new(ClonableMmap::from(mmap)); let cursor = io::Cursor::new(ClonableMmap::from(mmap));
let reader = grenad::Reader::new(cursor)?; let reader = grenad::Reader::new(cursor)?;
Ok(reader) Ok(reader)

View File

@ -8,7 +8,7 @@ use std::convert::{TryFrom, TryInto};
pub use clonable_mmap::{ClonableMmap, CursorClonableMmap}; pub use clonable_mmap::{ClonableMmap, CursorClonableMmap};
use fst::{IntoStreamer, Streamer}; use fst::{IntoStreamer, Streamer};
pub use grenad_helpers::{ pub use grenad_helpers::{
create_sorter, create_writer, grenad_obkv_into_chunks, into_clonable_grenad, merge_readers, as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks, merge_readers,
sorter_into_lmdb_database, sorter_into_reader, write_into_lmdb_database, writer_into_reader, sorter_into_lmdb_database, sorter_into_reader, write_into_lmdb_database, writer_into_reader,
GrenadParameters, GrenadParameters,
}; };

View File

@ -16,9 +16,9 @@ use slice_group_by::GroupBy;
use typed_chunk::{write_typed_chunk_into_index, TypedChunk}; use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
pub use self::helpers::{ pub use self::helpers::{
create_sorter, create_writer, fst_stream_into_hashset, fst_stream_into_vec, as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, sorter_into_lmdb_database, fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
write_into_lmdb_database, writer_into_reader, ClonableMmap, MergeFn, sorter_into_lmdb_database, write_into_lmdb_database, writer_into_reader, ClonableMmap, MergeFn,
}; };
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters}; use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
pub use self::transform::{Transform, TransformOutput}; pub use self::transform::{Transform, TransformOutput};
@ -292,42 +292,18 @@ where
for result in lmdb_writer_rx { for result in lmdb_writer_rx {
let typed_chunk = match result? { let typed_chunk = match result? {
TypedChunk::WordDocids(chunk) => { TypedChunk::WordDocids(chunk) => {
// We extract and mmap our chunk file to be able to get it for next processes. let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
let mut file = chunk.into_inner(); word_docids.push(cloneable_chunk);
let mmap = unsafe { memmap2::Mmap::map(&file)? };
let cursor_mmap = CursorClonableMmap::new(ClonableMmap::from(mmap));
let chunk = grenad::Reader::new(cursor_mmap)?;
word_docids.push(chunk);
// We reconstruct our typed-chunk back.
file.rewind()?;
let chunk = grenad::Reader::new(file)?;
TypedChunk::WordDocids(chunk) TypedChunk::WordDocids(chunk)
} }
TypedChunk::WordPairProximityDocids(chunk) => { TypedChunk::WordPairProximityDocids(chunk) => {
// We extract and mmap our chunk file to be able to get it for next processes. let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
let mut file = chunk.into_inner(); word_pair_proximity_docids.push(cloneable_chunk);
let mmap = unsafe { memmap2::Mmap::map(&file)? };
let cursor_mmap = CursorClonableMmap::new(ClonableMmap::from(mmap));
let chunk = grenad::Reader::new(cursor_mmap)?;
word_pair_proximity_docids.push(chunk);
// We reconstruct our typed-chunk back.
file.rewind()?;
let chunk = grenad::Reader::new(file)?;
TypedChunk::WordPairProximityDocids(chunk) TypedChunk::WordPairProximityDocids(chunk)
} }
TypedChunk::WordPositionDocids(chunk) => { TypedChunk::WordPositionDocids(chunk) => {
// We extract and mmap our chunk file to be able to get it for next processes. let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
let mut file = chunk.into_inner(); word_position_docids.push(cloneable_chunk);
let mmap = unsafe { memmap2::Mmap::map(&file)? };
let cursor_mmap = CursorClonableMmap::new(ClonableMmap::from(mmap));
let chunk = grenad::Reader::new(cursor_mmap)?;
word_position_docids.push(chunk);
// We reconstruct our typed-chunk back.
file.rewind()?;
let chunk = grenad::Reader::new(file)?;
TypedChunk::WordPositionDocids(chunk) TypedChunk::WordPositionDocids(chunk)
} }
otherwise => otherwise, otherwise => otherwise,

View File

@ -12,7 +12,7 @@ use super::helpers::{
CursorClonableMmap, CursorClonableMmap,
}; };
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string}; use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
use crate::update::index_documents::helpers::into_clonable_grenad; use crate::update::index_documents::helpers::as_cloneable_grenad;
use crate::{ use crate::{
lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index, lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
Result, Result,
@ -87,7 +87,7 @@ pub(crate) fn write_typed_chunk_into_index(
return Ok((documents_ids, is_merged_database)) return Ok((documents_ids, is_merged_database))
} }
TypedChunk::WordDocids(word_docids_iter) => { TypedChunk::WordDocids(word_docids_iter) => {
let word_docids_iter = unsafe { into_clonable_grenad(word_docids_iter) }?; let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_iter) }?;
append_entries_into_database( append_entries_into_database(
word_docids_iter.clone(), word_docids_iter.clone(),
&index.word_docids, &index.word_docids,