mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-12-23 13:10:06 +01:00
Make the original sorter optional
This commit is contained in:
parent
abe29772db
commit
1aa8ed9ef7
@ -6,6 +6,7 @@ mod typed_chunk;
|
||||
|
||||
use std::collections::{HashMap, HashSet};
|
||||
use std::io::{Read, Seek};
|
||||
use std::iter;
|
||||
use std::num::NonZeroU32;
|
||||
use std::result::Result as StdResult;
|
||||
use std::sync::Arc;
|
||||
@ -373,8 +374,11 @@ where
|
||||
}
|
||||
};
|
||||
|
||||
let original_documents = grenad::Reader::new(original_documents)?;
|
||||
let flattened_documents = grenad::Reader::new(flattened_documents)?;
|
||||
let original_documents = match original_documents {
|
||||
Some(original_documents) => Some(grenad::Reader::new(original_documents)?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let max_positions_per_attributes = self.indexer_config.max_positions_per_attributes;
|
||||
|
||||
@ -393,11 +397,21 @@ where
|
||||
pool.install(|| {
|
||||
rayon::spawn(move || {
|
||||
let child_span = tracing::trace_span!(target: "indexing::details", parent: ¤t_span, "extract_and_send_grenad_chunks");
|
||||
let _enter = child_span.enter();
|
||||
puffin::profile_scope!("extract_and_send_grenad_chunks");
|
||||
let _enter = child_span.enter();
|
||||
puffin::profile_scope!("extract_and_send_grenad_chunks");
|
||||
// split obkv file into several chunks
|
||||
let original_chunk_iter =
|
||||
grenad_obkv_into_chunks(original_documents, pool_params, documents_chunk_size);
|
||||
match original_documents {
|
||||
Some(original_documents) => {
|
||||
grenad_obkv_into_chunks(
|
||||
original_documents,
|
||||
pool_params,
|
||||
documents_chunk_size
|
||||
)
|
||||
.map(either::Either::Left)
|
||||
},
|
||||
None => Ok(either::Right(iter::empty())),
|
||||
};
|
||||
|
||||
// split obkv file into several chunks
|
||||
let flattened_chunk_iter =
|
||||
|
@ -33,7 +33,7 @@ pub struct TransformOutput {
|
||||
pub settings_diff: InnerIndexSettingsDiff,
|
||||
pub field_distribution: FieldDistribution,
|
||||
pub documents_count: usize,
|
||||
pub original_documents: File,
|
||||
pub original_documents: Option<File>,
|
||||
pub flattened_documents: File,
|
||||
}
|
||||
|
||||
@ -822,7 +822,9 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
settings_diff,
|
||||
field_distribution,
|
||||
documents_count: self.documents_count,
|
||||
original_documents: original_documents.into_inner().map_err(|err| err.into_error())?,
|
||||
original_documents: Some(
|
||||
original_documents.into_inner().map_err(|err| err.into_error())?,
|
||||
),
|
||||
flattened_documents: flattened_documents
|
||||
.into_inner()
|
||||
.map_err(|err| err.into_error())?,
|
||||
@ -891,14 +893,18 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
let documents_count = documents_ids.len() as usize;
|
||||
|
||||
// We initialize the sorter with the user indexing settings.
|
||||
let mut original_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
keep_first,
|
||||
self.indexer_settings.chunk_compression_type,
|
||||
self.indexer_settings.chunk_compression_level,
|
||||
self.indexer_settings.max_nb_chunks,
|
||||
self.indexer_settings.max_memory.map(|mem| mem / 2),
|
||||
);
|
||||
let mut original_sorter = if settings_diff.reindex_vectors() {
|
||||
Some(create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
keep_first,
|
||||
self.indexer_settings.chunk_compression_type,
|
||||
self.indexer_settings.chunk_compression_level,
|
||||
self.indexer_settings.max_nb_chunks,
|
||||
self.indexer_settings.max_memory.map(|mem| mem / 2),
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// We initialize the sorter with the user indexing settings.
|
||||
let mut flattened_sorter = create_sorter(
|
||||
@ -929,7 +935,9 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
document_sorter_key_buffer.clear();
|
||||
document_sorter_key_buffer.extend_from_slice(&docid.to_be_bytes());
|
||||
document_sorter_key_buffer.extend_from_slice(external_id.as_bytes());
|
||||
original_sorter.insert(&document_sorter_key_buffer, &original_obkv_buffer)?;
|
||||
if let Some(original_sorter) = original_sorter.as_mut() {
|
||||
original_sorter.insert(&document_sorter_key_buffer, &original_obkv_buffer)?;
|
||||
}
|
||||
flattened_sorter.insert(docid.to_be_bytes(), &flattened_obkv_buffer)?;
|
||||
}
|
||||
|
||||
@ -941,16 +949,18 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
};
|
||||
|
||||
// Once we have written all the documents, we merge everything into a Reader.
|
||||
let original_documents = sorter_into_reader(original_sorter, grenad_params)?;
|
||||
|
||||
let flattened_documents = sorter_into_reader(flattened_sorter, grenad_params)?;
|
||||
let original_documents = match original_sorter {
|
||||
Some(original_sorter) => Some(sorter_into_reader(original_sorter, grenad_params)?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
Ok(TransformOutput {
|
||||
primary_key,
|
||||
field_distribution,
|
||||
settings_diff,
|
||||
documents_count,
|
||||
original_documents: original_documents.into_inner().into_inner(),
|
||||
original_documents: original_documents.map(|od| od.into_inner().into_inner()),
|
||||
flattened_documents: flattened_documents.into_inner().into_inner(),
|
||||
})
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user