Rename docid_fid into fid_docid

This commit is contained in:
Clément Renault 2023-10-18 13:53:58 +02:00 committed by Louis Dureuil
parent bc45c1206d
commit a82dee21e0
No known key found for this signature in database
2 changed files with 22 additions and 26 deletions

View File

@ -29,9 +29,8 @@ const TRUNCATE_SIZE: usize = size_of::<FieldId>() + size_of::<DocumentId>();
/// The extracted facet values stored in grenad files by type. /// The extracted facet values stored in grenad files by type.
pub struct ExtractedFacetValues { pub struct ExtractedFacetValues {
// TOOD rename into `fid_docid_*` pub fid_docid_facet_numbers_chunk: grenad::Reader<BufReader<File>>,
pub docid_fid_facet_numbers_chunk: grenad::Reader<BufReader<File>>, pub fid_docid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
pub docid_fid_facet_strings_chunk: grenad::Reader<BufReader<File>>,
pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>, pub fid_facet_is_null_docids_chunk: grenad::Reader<BufReader<File>>,
pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>, pub fid_facet_is_empty_docids_chunk: grenad::Reader<BufReader<File>>,
pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>, pub fid_facet_exists_docids_chunk: grenad::Reader<BufReader<File>>,
@ -44,7 +43,6 @@ pub struct ExtractedFacetValues {
/// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially. /// We need the fid of the geofields to correctly parse them as numbers if they were sent as strings initially.
#[logging_timer::time] #[logging_timer::time]
pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>( pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
// TODO Reader<Obkv<FieldId, Obkv<DelAdd, serde_json::Value>>>
obkv_documents: grenad::Reader<R>, obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters, indexer: GrenadParameters,
faceted_fields: &HashSet<FieldId>, faceted_fields: &HashSet<FieldId>,
@ -83,7 +81,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
let mut cursor = obkv_documents.into_cursor()?; let mut cursor = obkv_documents.into_cursor()?;
while let Some((docid_bytes, value)) = cursor.move_on_next()? { while let Some((docid_bytes, value)) = cursor.move_on_next()? {
// TODO Obkv<FieldId, Obkv<DelAdd, serde_json::Value>>
let obkv = obkv::KvReader::new(value); let obkv = obkv::KvReader::new(value);
for (field_id, field_bytes) in obkv.iter() { for (field_id, field_bytes) in obkv.iter() {
@ -96,7 +93,6 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes()); numbers_key_buffer.extend_from_slice(&field_id.to_be_bytes());
strings_key_buffer.extend_from_slice(&field_id.to_be_bytes()); strings_key_buffer.extend_from_slice(&field_id.to_be_bytes());
// Here, we know already that the document must be added to the “field id exists” database
let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap(); let document: [u8; 4] = docid_bytes[..4].try_into().ok().unwrap();
let document = BEU32::from(document).get(); let document = BEU32::from(document).get();
@ -260,8 +256,8 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
let facet_is_empty_docids_reader = writer_into_reader(facet_is_empty_docids_writer)?; let facet_is_empty_docids_reader = writer_into_reader(facet_is_empty_docids_writer)?;
Ok(ExtractedFacetValues { Ok(ExtractedFacetValues {
docid_fid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?, fid_docid_facet_numbers_chunk: sorter_into_reader(fid_docid_facet_numbers_sorter, indexer)?,
docid_fid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?, fid_docid_facet_strings_chunk: sorter_into_reader(fid_docid_facet_strings_sorter, indexer)?,
fid_facet_is_null_docids_chunk: facet_is_null_docids_reader, fid_facet_is_null_docids_chunk: facet_is_null_docids_reader,
fid_facet_is_empty_docids_chunk: facet_is_empty_docids_reader, fid_facet_is_empty_docids_chunk: facet_is_empty_docids_reader,
fid_facet_exists_docids_chunk: facet_exists_docids_reader, fid_facet_exists_docids_chunk: facet_exists_docids_reader,

View File

@ -92,9 +92,9 @@ pub(crate) fn data_from_obkv_documents(
let ( let (
docid_word_positions_chunks, docid_word_positions_chunks,
( (
docid_fid_facet_numbers_chunks, fid_docid_facet_numbers_chunks,
( (
docid_fid_facet_strings_chunks, fid_docid_facet_strings_chunks,
( (
facet_is_null_docids_chunks, facet_is_null_docids_chunks,
(facet_is_empty_docids_chunks, facet_exists_docids_chunks), (facet_is_empty_docids_chunks, facet_exists_docids_chunks),
@ -206,7 +206,7 @@ pub(crate) fn data_from_obkv_documents(
); );
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>( spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
docid_fid_facet_strings_chunks, fid_docid_facet_strings_chunks,
indexer, indexer,
lmdb_writer_sx.clone(), lmdb_writer_sx.clone(),
extract_facet_string_docids, extract_facet_string_docids,
@ -216,7 +216,7 @@ pub(crate) fn data_from_obkv_documents(
); );
spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>( spawn_extraction_task::<_, _, Vec<grenad::Reader<BufReader<File>>>>(
docid_fid_facet_numbers_chunks, fid_docid_facet_numbers_chunks,
indexer, indexer,
lmdb_writer_sx, lmdb_writer_sx,
extract_facet_number_docids, extract_facet_number_docids,
@ -352,7 +352,7 @@ fn send_and_extract_flattened_documents_data(
}); });
} }
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) = let (docid_word_positions_chunk, fid_docid_facet_values_chunks): (Result<_>, Result<_>) =
rayon::join( rayon::join(
|| { || {
let (documents_ids, docid_word_positions_chunk, script_language_pair) = let (documents_ids, docid_word_positions_chunk, script_language_pair) =
@ -380,8 +380,8 @@ fn send_and_extract_flattened_documents_data(
}, },
|| { || {
let ExtractedFacetValues { let ExtractedFacetValues {
docid_fid_facet_numbers_chunk, fid_docid_facet_numbers_chunk,
docid_fid_facet_strings_chunk, fid_docid_facet_strings_chunk,
fid_facet_is_null_docids_chunk, fid_facet_is_null_docids_chunk,
fid_facet_is_empty_docids_chunk, fid_facet_is_empty_docids_chunk,
fid_facet_exists_docids_chunk, fid_facet_exists_docids_chunk,
@ -392,26 +392,26 @@ fn send_and_extract_flattened_documents_data(
geo_fields_ids, geo_fields_ids,
)?; )?;
// send docid_fid_facet_numbers_chunk to DB writer // send fid_docid_facet_numbers_chunk to DB writer
let docid_fid_facet_numbers_chunk = let fid_docid_facet_numbers_chunk =
unsafe { as_cloneable_grenad(&docid_fid_facet_numbers_chunk)? }; unsafe { as_cloneable_grenad(&fid_docid_facet_numbers_chunk)? };
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetNumbers( let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetNumbers(
docid_fid_facet_numbers_chunk.clone(), fid_docid_facet_numbers_chunk.clone(),
))); )));
// send docid_fid_facet_strings_chunk to DB writer // send fid_docid_facet_strings_chunk to DB writer
let docid_fid_facet_strings_chunk = let fid_docid_facet_strings_chunk =
unsafe { as_cloneable_grenad(&docid_fid_facet_strings_chunk)? }; unsafe { as_cloneable_grenad(&fid_docid_facet_strings_chunk)? };
let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetStrings( let _ = lmdb_writer_sx.send(Ok(TypedChunk::FieldIdDocidFacetStrings(
docid_fid_facet_strings_chunk.clone(), fid_docid_facet_strings_chunk.clone(),
))); )));
Ok(( Ok((
docid_fid_facet_numbers_chunk, fid_docid_facet_numbers_chunk,
( (
docid_fid_facet_strings_chunk, fid_docid_facet_strings_chunk,
( (
fid_facet_is_null_docids_chunk, fid_facet_is_null_docids_chunk,
(fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk), (fid_facet_is_empty_docids_chunk, fid_facet_exists_docids_chunk),
@ -421,5 +421,5 @@ fn send_and_extract_flattened_documents_data(
}, },
); );
Ok((docid_word_positions_chunk?, docid_fid_facet_values_chunks?)) Ok((docid_word_positions_chunk?, fid_docid_facet_values_chunks?))
} }