mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
Make the flattened sorter optional
This commit is contained in:
parent
943f8dba0c
commit
500ddc76b5
@ -360,7 +360,10 @@ where
|
||||
let min_chunk_size = 1024 * 512; // 512KiB
|
||||
|
||||
// compute the chunk size from the number of available threads and the inputed data size.
|
||||
let total_size = flattened_documents.metadata().map(|m| m.len());
|
||||
let total_size = match flattened_documents.as_ref() {
|
||||
Some(flattened_documents) => flattened_documents.metadata().map(|m| m.len()),
|
||||
None => Ok(default_chunk_size as u64),
|
||||
};
|
||||
let current_num_threads = pool.current_num_threads();
|
||||
// if we have more than 2 thread, create a number of chunk equal to 3/4 threads count
|
||||
let chunk_count = if current_num_threads > 2 {
|
||||
@ -374,11 +377,14 @@ where
|
||||
}
|
||||
};
|
||||
|
||||
let flattened_documents = grenad::Reader::new(flattened_documents)?;
|
||||
let original_documents = match original_documents {
|
||||
Some(original_documents) => Some(grenad::Reader::new(original_documents)?),
|
||||
None => None,
|
||||
};
|
||||
let flattened_documents = match flattened_documents {
|
||||
Some(flattened_documents) => Some(grenad::Reader::new(flattened_documents)?),
|
||||
None => None,
|
||||
};
|
||||
|
||||
let max_positions_per_attributes = self.indexer_config.max_positions_per_attributes;
|
||||
|
||||
@ -400,22 +406,20 @@ where
|
||||
let _enter = child_span.enter();
|
||||
puffin::profile_scope!("extract_and_send_grenad_chunks");
|
||||
// split obkv file into several chunks
|
||||
let original_chunk_iter =
|
||||
match original_documents {
|
||||
let original_chunk_iter = match original_documents {
|
||||
Some(original_documents) => {
|
||||
grenad_obkv_into_chunks(
|
||||
original_documents,
|
||||
pool_params,
|
||||
documents_chunk_size
|
||||
)
|
||||
.map(either::Either::Left)
|
||||
grenad_obkv_into_chunks(original_documents,pool_params,documents_chunk_size).map(either::Left)
|
||||
},
|
||||
None => Ok(either::Right(iter::empty())),
|
||||
};
|
||||
|
||||
// split obkv file into several chunks
|
||||
let flattened_chunk_iter =
|
||||
grenad_obkv_into_chunks(flattened_documents, pool_params, documents_chunk_size);
|
||||
let flattened_chunk_iter = match flattened_documents {
|
||||
Some(flattened_documents) => {
|
||||
grenad_obkv_into_chunks(flattened_documents, pool_params, documents_chunk_size).map(either::Left)
|
||||
},
|
||||
None => Ok(either::Right(iter::empty())),
|
||||
};
|
||||
|
||||
let result = original_chunk_iter.and_then(|original_chunk| {
|
||||
let flattened_chunk = flattened_chunk_iter?;
|
||||
|
@ -34,7 +34,7 @@ pub struct TransformOutput {
|
||||
pub field_distribution: FieldDistribution,
|
||||
pub documents_count: usize,
|
||||
pub original_documents: Option<File>,
|
||||
pub flattened_documents: File,
|
||||
pub flattened_documents: Option<File>,
|
||||
}
|
||||
|
||||
/// Extract the external ids, deduplicate and compute the new internal documents ids
|
||||
@ -825,9 +825,9 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
original_documents: Some(
|
||||
original_documents.into_inner().map_err(|err| err.into_error())?,
|
||||
),
|
||||
flattened_documents: flattened_documents
|
||||
.into_inner()
|
||||
.map_err(|err| err.into_error())?,
|
||||
flattened_documents: Some(
|
||||
flattened_documents.into_inner().map_err(|err| err.into_error())?,
|
||||
),
|
||||
})
|
||||
}
|
||||
|
||||
@ -840,6 +840,9 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
original_obkv_buffer: &mut Vec<u8>,
|
||||
flattened_obkv_buffer: &mut Vec<u8>,
|
||||
) -> Result<()> {
|
||||
/// TODO do a XOR of the faceted fields
|
||||
/// TODO if reindex_searchable returns true store all searchables else none
|
||||
/// TODO no longer useful after Tamo's PR
|
||||
let mut old_fields_ids_map = settings_diff.old.fields_ids_map.clone();
|
||||
let mut new_fields_ids_map = settings_diff.new.fields_ids_map.clone();
|
||||
let mut obkv_writer = KvWriter::<_, FieldId>::memory();
|
||||
@ -907,14 +910,19 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
};
|
||||
|
||||
// We initialize the sorter with the user indexing settings.
|
||||
let mut flattened_sorter = create_sorter(
|
||||
let mut flattened_sorter =
|
||||
if settings_diff.reindex_searchable() || settings_diff.reindex_facets() {
|
||||
Some(create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
keep_first,
|
||||
self.indexer_settings.chunk_compression_type,
|
||||
self.indexer_settings.chunk_compression_level,
|
||||
self.indexer_settings.max_nb_chunks,
|
||||
self.indexer_settings.max_memory.map(|mem| mem / 2),
|
||||
);
|
||||
))
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
let mut original_obkv_buffer = Vec::new();
|
||||
let mut flattened_obkv_buffer = Vec::new();
|
||||
@ -938,8 +946,10 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
if let Some(original_sorter) = original_sorter.as_mut() {
|
||||
original_sorter.insert(&document_sorter_key_buffer, &original_obkv_buffer)?;
|
||||
}
|
||||
if let Some(flattened_sorter) = flattened_sorter.as_mut() {
|
||||
flattened_sorter.insert(docid.to_be_bytes(), &flattened_obkv_buffer)?;
|
||||
}
|
||||
}
|
||||
|
||||
let grenad_params = GrenadParameters {
|
||||
chunk_compression_type: self.indexer_settings.chunk_compression_type,
|
||||
@ -949,7 +959,10 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
};
|
||||
|
||||
// Once we have written all the documents, we merge everything into a Reader.
|
||||
let flattened_documents = sorter_into_reader(flattened_sorter, grenad_params)?;
|
||||
let flattened_documents = match flattened_sorter {
|
||||
Some(flattened_sorter) => Some(sorter_into_reader(flattened_sorter, grenad_params)?),
|
||||
None => None,
|
||||
};
|
||||
let original_documents = match original_sorter {
|
||||
Some(original_sorter) => Some(sorter_into_reader(original_sorter, grenad_params)?),
|
||||
None => None,
|
||||
@ -961,7 +974,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
||||
settings_diff,
|
||||
documents_count,
|
||||
original_documents: original_documents.map(|od| od.into_inner().into_inner()),
|
||||
flattened_documents: flattened_documents.into_inner().into_inner(),
|
||||
flattened_documents: flattened_documents.map(|fd| fd.into_inner().into_inner()),
|
||||
})
|
||||
}
|
||||
}
|
||||
|
@ -1099,6 +1099,7 @@ impl InnerIndexSettingsDiff {
|
||||
}
|
||||
|
||||
pub fn reindex_searchable(&self) -> bool {
|
||||
// TODO no longer useful after Tamo's PR
|
||||
self.old
|
||||
.fields_ids_map
|
||||
.iter()
|
||||
|
Loading…
Reference in New Issue
Block a user