mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-25 12:47:28 +01:00
Don't sort in parallel in sorters of the new indexer
This commit is contained in:
parent
0647f75e6b
commit
0749633618
@ -40,6 +40,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
// initialize buffers.
|
// initialize buffers.
|
||||||
|
@ -32,6 +32,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
|
@ -61,6 +61,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory.map(|m| m / 2),
|
max_memory.map(|m| m / 2),
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut normalized_facet_string_docids_sorter = create_sorter(
|
let mut normalized_facet_string_docids_sorter = create_sorter(
|
||||||
@ -70,6 +71,7 @@ fn extract_facet_string_docids_document_update<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory.map(|m| m / 2),
|
max_memory.map(|m| m / 2),
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
@ -149,6 +151,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory.map(|m| m / 2),
|
max_memory.map(|m| m / 2),
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut normalized_facet_string_docids_sorter = create_sorter(
|
let mut normalized_facet_string_docids_sorter = create_sorter(
|
||||||
@ -158,6 +161,7 @@ fn extract_facet_string_docids_settings<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory.map(|m| m / 2),
|
max_memory.map(|m| m / 2),
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut buffer = Vec::new();
|
let mut buffer = Vec::new();
|
||||||
|
@ -53,6 +53,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory.map(|m| m / 2),
|
max_memory.map(|m| m / 2),
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut fid_docid_facet_strings_sorter = create_sorter(
|
let mut fid_docid_facet_strings_sorter = create_sorter(
|
||||||
@ -62,6 +63,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory.map(|m| m / 2),
|
max_memory.map(|m| m / 2),
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
// The tuples represents the Del and Add side for a bitmap
|
// The tuples represents the Del and Add side for a bitmap
|
||||||
|
@ -35,6 +35,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut key_buffer = Vec::new();
|
let mut key_buffer = Vec::new();
|
||||||
|
@ -44,6 +44,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory.map(|m| m / 3),
|
max_memory.map(|m| m / 3),
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
let mut key_buffer = Vec::new();
|
let mut key_buffer = Vec::new();
|
||||||
let mut del_words = BTreeSet::new();
|
let mut del_words = BTreeSet::new();
|
||||||
@ -98,6 +99,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory.map(|m| m / 3),
|
max_memory.map(|m| m / 3),
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut exact_word_docids_sorter = create_sorter(
|
let mut exact_word_docids_sorter = create_sorter(
|
||||||
@ -107,6 +109,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory.map(|m| m / 3),
|
max_memory.map(|m| m / 3),
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?;
|
let mut iter = word_fid_docids_sorter.into_stream_merger_iter()?;
|
||||||
|
@ -49,6 +49,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory.map(|m| m / MAX_DISTANCE as usize),
|
max_memory.map(|m| m / MAX_DISTANCE as usize),
|
||||||
|
true,
|
||||||
)
|
)
|
||||||
})
|
})
|
||||||
.collect();
|
.collect();
|
||||||
|
@ -33,6 +33,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();
|
let mut del_word_positions: BTreeSet<(u16, Vec<u8>)> = BTreeSet::new();
|
||||||
|
@ -37,6 +37,7 @@ pub fn create_sorter<MF: MergeFunction>(
|
|||||||
chunk_compression_level: Option<u32>,
|
chunk_compression_level: Option<u32>,
|
||||||
max_nb_chunks: Option<usize>,
|
max_nb_chunks: Option<usize>,
|
||||||
max_memory: Option<usize>,
|
max_memory: Option<usize>,
|
||||||
|
sort_in_parallel: bool,
|
||||||
) -> grenad::Sorter<MF> {
|
) -> grenad::Sorter<MF> {
|
||||||
let mut builder = grenad::Sorter::builder(merge);
|
let mut builder = grenad::Sorter::builder(merge);
|
||||||
builder.chunk_compression_type(chunk_compression_type);
|
builder.chunk_compression_type(chunk_compression_type);
|
||||||
@ -51,7 +52,7 @@ pub fn create_sorter<MF: MergeFunction>(
|
|||||||
builder.allow_realloc(false);
|
builder.allow_realloc(false);
|
||||||
}
|
}
|
||||||
builder.sort_algorithm(sort_algorithm);
|
builder.sort_algorithm(sort_algorithm);
|
||||||
builder.sort_in_parallel(true);
|
builder.sort_in_parallel(sort_in_parallel);
|
||||||
builder.build()
|
builder.build()
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -127,6 +127,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
indexer_settings.chunk_compression_level,
|
indexer_settings.chunk_compression_level,
|
||||||
indexer_settings.max_nb_chunks,
|
indexer_settings.max_nb_chunks,
|
||||||
indexer_settings.max_memory.map(|mem| mem / 2),
|
indexer_settings.max_memory.map(|mem| mem / 2),
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
// We initialize the sorter with the user indexing settings.
|
// We initialize the sorter with the user indexing settings.
|
||||||
@ -137,6 +138,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
indexer_settings.chunk_compression_level,
|
indexer_settings.chunk_compression_level,
|
||||||
indexer_settings.max_nb_chunks,
|
indexer_settings.max_nb_chunks,
|
||||||
indexer_settings.max_memory.map(|mem| mem / 2),
|
indexer_settings.max_memory.map(|mem| mem / 2),
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
let documents_ids = index.documents_ids(wtxn)?;
|
let documents_ids = index.documents_ids(wtxn)?;
|
||||||
|
|
||||||
@ -988,6 +990,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
self.indexer_settings.chunk_compression_level,
|
self.indexer_settings.chunk_compression_level,
|
||||||
self.indexer_settings.max_nb_chunks,
|
self.indexer_settings.max_nb_chunks,
|
||||||
self.indexer_settings.max_memory.map(|mem| mem / 2),
|
self.indexer_settings.max_memory.map(|mem| mem / 2),
|
||||||
|
true,
|
||||||
))
|
))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
@ -1030,6 +1033,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||||||
self.indexer_settings.chunk_compression_level,
|
self.indexer_settings.chunk_compression_level,
|
||||||
self.indexer_settings.max_nb_chunks,
|
self.indexer_settings.max_nb_chunks,
|
||||||
self.indexer_settings.max_memory.map(|mem| mem / 2),
|
self.indexer_settings.max_memory.map(|mem| mem / 2),
|
||||||
|
true,
|
||||||
))
|
))
|
||||||
} else {
|
} else {
|
||||||
None
|
None
|
||||||
|
@ -46,6 +46,10 @@ impl<'extractor> Extractor<'extractor> for FacetedExtractorData<'extractor> {
|
|||||||
self.grenad_parameters.chunk_compression_level,
|
self.grenad_parameters.chunk_compression_level,
|
||||||
self.grenad_parameters.max_nb_chunks,
|
self.grenad_parameters.max_nb_chunks,
|
||||||
self.max_memory,
|
self.max_memory,
|
||||||
|
// *NOTE*: this must not be set to true:
|
||||||
|
// 1. we're already using max parallelism in the pool, so it wouldn't help
|
||||||
|
// 2. it creates correctness issues if it causes to yield a borrow-mut wielding task
|
||||||
|
false,
|
||||||
),
|
),
|
||||||
))))
|
))))
|
||||||
}
|
}
|
||||||
|
@ -48,6 +48,7 @@ impl WordDocidsCachedSorters {
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
|
false,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
let word_docids = CboCachedSorter::new(
|
let word_docids = CboCachedSorter::new(
|
||||||
@ -59,6 +60,7 @@ impl WordDocidsCachedSorters {
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
|
false,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
let exact_word_docids = CboCachedSorter::new(
|
let exact_word_docids = CboCachedSorter::new(
|
||||||
@ -70,6 +72,7 @@ impl WordDocidsCachedSorters {
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
|
false,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
let word_position_docids = CboCachedSorter::new(
|
let word_position_docids = CboCachedSorter::new(
|
||||||
@ -81,6 +84,7 @@ impl WordDocidsCachedSorters {
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
|
false,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
let fid_word_count_docids = CboCachedSorter::new(
|
let fid_word_count_docids = CboCachedSorter::new(
|
||||||
@ -92,6 +96,7 @@ impl WordDocidsCachedSorters {
|
|||||||
indexer.chunk_compression_level,
|
indexer.chunk_compression_level,
|
||||||
indexer.max_nb_chunks,
|
indexer.max_nb_chunks,
|
||||||
max_memory,
|
max_memory,
|
||||||
|
false,
|
||||||
),
|
),
|
||||||
);
|
);
|
||||||
|
|
||||||
|
@ -50,6 +50,7 @@ impl<'extractor, EX: SearchableExtractor + Sync> Extractor<'extractor>
|
|||||||
self.grenad_parameters.chunk_compression_level,
|
self.grenad_parameters.chunk_compression_level,
|
||||||
self.grenad_parameters.max_nb_chunks,
|
self.grenad_parameters.max_nb_chunks,
|
||||||
self.max_memory,
|
self.max_memory,
|
||||||
|
false,
|
||||||
),
|
),
|
||||||
))))
|
))))
|
||||||
}
|
}
|
||||||
|
@ -60,6 +60,7 @@ impl<'t, 'i> WordPrefixDocids<'t, 'i> {
|
|||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
self.max_nb_chunks,
|
self.max_nb_chunks,
|
||||||
self.max_memory,
|
self.max_memory,
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
if !common_prefix_fst_words.is_empty() {
|
if !common_prefix_fst_words.is_empty() {
|
||||||
|
@ -65,6 +65,7 @@ impl<'t, 'i> WordPrefixIntegerDocids<'t, 'i> {
|
|||||||
self.chunk_compression_level,
|
self.chunk_compression_level,
|
||||||
self.max_nb_chunks,
|
self.max_nb_chunks,
|
||||||
self.max_memory,
|
self.max_memory,
|
||||||
|
true,
|
||||||
);
|
);
|
||||||
|
|
||||||
if !common_prefix_fst_words.is_empty() {
|
if !common_prefix_fst_words.is_empty() {
|
||||||
|
Loading…
x
Reference in New Issue
Block a user