only call the extractor if needed

This commit is contained in:
Tamo 2021-08-30 15:47:11 +02:00
parent 4fd0116a0d
commit f73273d71c
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
2 changed files with 11 additions and 15 deletions

View File

@ -14,18 +14,12 @@ use crate::{FieldId, InternalError, Result};
pub fn extract_geo_points<R: io::Read>(
mut obkv_documents: grenad::Reader<R>,
indexer: GrenadParameters,
geo_field_id: Option<FieldId>, // faire un grenad vide
geo_field_id: FieldId,
) -> Result<grenad::Reader<File>> {
let mut writer = tempfile::tempfile().and_then(|file| {
create_writer(indexer.chunk_compression_type, indexer.chunk_compression_level, file)
})?;
// we never encountered any documents with a `_geo` field. We can skip entirely this step
let geo_field_id = match geo_field_id {
Some(geo) => geo,
None => return Ok(writer_into_reader(writer)?),
};
while let Some((docid_bytes, value)) = obkv_documents.next()? {
let obkv = obkv::KvReader::new(value);
let point = match obkv.get(geo_field_id) {

View File

@ -182,14 +182,16 @@ fn extract_documents_data(
let _ = lmdb_writer_sx.send(Ok(TypedChunk::Documents(documents_chunk.clone())));
let documents_chunk_cloned = documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || {
let _ = match extract_geo_points(documents_chunk_cloned, indexer, geo_field_id) {
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
};
});
if let Some(geo_field_id) = geo_field_id {
let documents_chunk_cloned = documents_chunk.clone();
let lmdb_writer_sx_cloned = lmdb_writer_sx.clone();
rayon::spawn(move || {
let _ = match extract_geo_points(documents_chunk_cloned, indexer, geo_field_id) {
Ok(geo_points) => lmdb_writer_sx_cloned.send(Ok(TypedChunk::GeoPoints(geo_points))),
Err(error) => lmdb_writer_sx_cloned.send(Err(error)),
};
});
}
let (docid_word_positions_chunk, docid_fid_facet_values_chunks): (Result<_>, Result<_>) =
rayon::join(