Change the behavior of the as_cloneable_grenad by taking a ref

This commit is contained in:
Clément Renault 2022-02-16 15:40:08 +01:00
parent f367cc2e75
commit ff8d7a810d
No known key found for this signature in database
GPG Key ID: 92ADA4E935E71FA4
5 changed files with 21 additions and 45 deletions

View File

@ -25,7 +25,7 @@ use self::extract_word_docids::extract_word_docids;
use self::extract_word_pair_proximity_docids::extract_word_pair_proximity_docids;
use self::extract_word_position_docids::extract_word_position_docids;
use super::helpers::{
into_clonable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps,
as_cloneable_grenad, keep_first_prefix_value_merge_roaring_bitmaps, merge_cbo_roaring_bitmaps,
merge_readers, merge_roaring_bitmaps, CursorClonableMmap, GrenadParameters, MergeFn,
};
use super::{helpers, TypedChunk};
@ -184,7 +184,7 @@ fn extract_documents_data(
grenad::Reader<CursorClonableMmap>,
(grenad::Reader<CursorClonableMmap>, grenad::Reader<CursorClonableMmap>),
)> {
let documents_chunk = documents_chunk.and_then(|c| unsafe { into_clonable_grenad(c) })?;
let documents_chunk = documents_chunk.and_then(|c| unsafe { as_cloneable_grenad(&c) })?;
let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone())));
@ -217,7 +217,7 @@ fn extract_documents_data(
// send docid_word_positions_chunk to DB writer
let docid_word_positions_chunk =
unsafe { into_clonable_grenad(docid_word_positions_chunk)? };
unsafe { as_cloneable_grenad(&docid_word_positions_chunk)? };
let _ = lmdb_writer_sx
.send(Ok(TypedChunk::DocidWordPositions(docid_word_positions_chunk.clone())));
@ -233,7 +233,7 @@ fn extract_documents_data(
// send docid_fid_facet_numbers_chunk to DB writer
let docid_fid_facet_numbers_chunk =
unsafe { into_clonable_grenad(docid_fid_facet_numbers_chunk)? };
unsafe { as_cloneable_grenad(&docid_fid_facet_numbers_chunk)? };
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetNumbers(
docid_fid_facet_numbers_chunk.clone(),
@ -241,7 +241,7 @@ fn extract_documents_data(
// send docid_fid_facet_strings_chunk to DB writer
let docid_fid_facet_strings_chunk =
unsafe { into_clonable_grenad(docid_fid_facet_strings_chunk)? };
unsafe { as_cloneable_grenad(&docid_fid_facet_strings_chunk)? };
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetStrings(
docid_fid_facet_strings_chunk.clone(),

View File

@ -68,11 +68,11 @@ pub fn writer_into_reader(writer: grenad::Writer<File>) -> Result<grenad::Reader
grenad::Reader::new(file).map_err(Into::into)
}
pub unsafe fn into_clonable_grenad(
reader: grenad::Reader<File>,
pub unsafe fn as_cloneable_grenad(
reader: &grenad::Reader<File>,
) -> Result<grenad::Reader<CursorClonableMmap>> {
let file = reader.into_inner();
let mmap = memmap2::Mmap::map(&file)?;
let file = reader.get_ref();
let mmap = memmap2::Mmap::map(file)?;
let cursor = io::Cursor::new(ClonableMmap::from(mmap));
let reader = grenad::Reader::new(cursor)?;
Ok(reader)

View File

@ -8,7 +8,7 @@ use std::convert::{TryFrom, TryInto};
pub use clonable_mmap::{ClonableMmap, CursorClonableMmap};
use fst::{IntoStreamer, Streamer};
pub use grenad_helpers::{
create_sorter, create_writer, grenad_obkv_into_chunks, into_clonable_grenad, merge_readers,
as_cloneable_grenad, create_sorter, create_writer, grenad_obkv_into_chunks, merge_readers,
sorter_into_lmdb_database, sorter_into_reader, write_into_lmdb_database, writer_into_reader,
GrenadParameters,
};

View File

@ -16,9 +16,9 @@ use slice_group_by::GroupBy;
use typed_chunk::{write_typed_chunk_into_index, TypedChunk};
pub use self::helpers::{
create_sorter, create_writer, fst_stream_into_hashset, fst_stream_into_vec,
merge_cbo_roaring_bitmaps, merge_roaring_bitmaps, sorter_into_lmdb_database,
write_into_lmdb_database, writer_into_reader, ClonableMmap, MergeFn,
as_cloneable_grenad, create_sorter, create_writer, fst_stream_into_hashset,
fst_stream_into_vec, merge_cbo_roaring_bitmaps, merge_roaring_bitmaps,
sorter_into_lmdb_database, write_into_lmdb_database, writer_into_reader, ClonableMmap, MergeFn,
};
use self::helpers::{grenad_obkv_into_chunks, GrenadParameters};
pub use self::transform::{Transform, TransformOutput};
@ -292,42 +292,18 @@ where
for result in lmdb_writer_rx {
let typed_chunk = match result? {
TypedChunk::WordDocids(chunk) => {
// We extract and mmap our chunk file to be able to get it for next processes.
let mut file = chunk.into_inner();
let mmap = unsafe { memmap2::Mmap::map(&file)? };
let cursor_mmap = CursorClonableMmap::new(ClonableMmap::from(mmap));
let chunk = grenad::Reader::new(cursor_mmap)?;
word_docids.push(chunk);
// We reconstruct our typed-chunk back.
file.rewind()?;
let chunk = grenad::Reader::new(file)?;
let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
word_docids.push(cloneable_chunk);
TypedChunk::WordDocids(chunk)
}
TypedChunk::WordPairProximityDocids(chunk) => {
// We extract and mmap our chunk file to be able to get it for next processes.
let mut file = chunk.into_inner();
let mmap = unsafe { memmap2::Mmap::map(&file)? };
let cursor_mmap = CursorClonableMmap::new(ClonableMmap::from(mmap));
let chunk = grenad::Reader::new(cursor_mmap)?;
word_pair_proximity_docids.push(chunk);
// We reconstruct our typed-chunk back.
file.rewind()?;
let chunk = grenad::Reader::new(file)?;
let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
word_pair_proximity_docids.push(cloneable_chunk);
TypedChunk::WordPairProximityDocids(chunk)
}
TypedChunk::WordPositionDocids(chunk) => {
// We extract and mmap our chunk file to be able to get it for next processes.
let mut file = chunk.into_inner();
let mmap = unsafe { memmap2::Mmap::map(&file)? };
let cursor_mmap = CursorClonableMmap::new(ClonableMmap::from(mmap));
let chunk = grenad::Reader::new(cursor_mmap)?;
word_position_docids.push(chunk);
// We reconstruct our typed-chunk back.
file.rewind()?;
let chunk = grenad::Reader::new(file)?;
let cloneable_chunk = unsafe { as_cloneable_grenad(&chunk)? };
word_position_docids.push(cloneable_chunk);
TypedChunk::WordPositionDocids(chunk)
}
otherwise => otherwise,

View File

@ -12,7 +12,7 @@ use super::helpers::{
CursorClonableMmap,
};
use crate::heed_codec::facet::{decode_prefix_string, encode_prefix_string};
use crate::update::index_documents::helpers::into_clonable_grenad;
use crate::update::index_documents::helpers::as_cloneable_grenad;
use crate::{
lat_lng_to_xyz, BoRoaringBitmapCodec, CboRoaringBitmapCodec, DocumentId, GeoPoint, Index,
Result,
@ -87,7 +87,7 @@ pub(crate) fn write_typed_chunk_into_index(
return Ok((documents_ids, is_merged_database))
}
TypedChunk::WordDocids(word_docids_iter) => {
let word_docids_iter = unsafe { into_clonable_grenad(word_docids_iter) }?;
let word_docids_iter = unsafe { as_cloneable_grenad(&word_docids_iter) }?;
append_entries_into_database(
word_docids_iter.clone(),
&index.word_docids,