mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-07-03 11:57:07 +02:00
Merge #635
635: Use an unstable algorithm for `grenad::Sorter` when possible r=Kerollmops a=loiclec # Pull Request ## What does this PR do? Use an unstable algorithm to sort the internal vector used by `grenad::Sorter` whenever possible to speed up indexing. In practice, every time the merge function creates a `RoaringBitmap`, we use an unstable sort. For every other merge function, such as `keep_first`, `keep_last`, etc., a stable sort is used. Co-authored-by: Loïc Lecrenier <loic@meilisearch.com>
This commit is contained in:
commit
15d478cf4d
14 changed files with 18 additions and 1 deletions
|
@ -32,6 +32,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
|
|||
|
||||
let mut documents_ids = RoaringBitmap::new();
|
||||
let mut docid_word_positions_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
concat_u32s_array,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
|
|
@ -21,6 +21,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
|
|||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut facet_number_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
|
|
@ -23,6 +23,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
|
|||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut facet_string_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
keep_first_prefix_value_merge_roaring_bitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
|
|
@ -28,6 +28,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_docid_facet_numbers_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
keep_first,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
@ -36,6 +37,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
|
|||
);
|
||||
|
||||
let mut fid_docid_facet_strings_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
keep_first,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
|
|
@ -25,6 +25,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
|
|||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut fid_word_count_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
|
|
@ -30,6 +30,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
merge_roaring_bitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
@ -38,6 +39,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
|
|||
);
|
||||
|
||||
let mut exact_word_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
merge_roaring_bitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
|
|
@ -24,6 +24,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
|
|||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_pair_proximity_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
|
|
@ -21,6 +21,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
|
|||
let max_memory = indexer.max_memory_by_thread();
|
||||
|
||||
let mut word_position_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
indexer.chunk_compression_type,
|
||||
indexer.chunk_compression_level,
|
||||
|
|
|
@ -27,6 +27,7 @@ pub fn create_writer<R: io::Write>(
|
|||
}
|
||||
|
||||
pub fn create_sorter(
|
||||
sort_algorithm: grenad::SortAlgorithm,
|
||||
merge: MergeFn,
|
||||
chunk_compression_type: grenad::CompressionType,
|
||||
chunk_compression_level: Option<u32>,
|
||||
|
@ -45,6 +46,7 @@ pub fn create_sorter(
|
|||
builder.dump_threshold(memory);
|
||||
builder.allow_realloc(false);
|
||||
}
|
||||
builder.sort_algorithm(sort_algorithm);
|
||||
builder.build()
|
||||
}
|
||||
|
||||
|
|
|
@ -1488,6 +1488,7 @@ mod tests {
|
|||
assert_eq!(count, 4);
|
||||
}
|
||||
|
||||
#[cfg(feature = "default")]
|
||||
#[test]
|
||||
fn test_meilisearch_1714() {
|
||||
let index = TempIndex::new();
|
||||
|
|
|
@ -99,6 +99,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||
|
||||
// We initialize the sorter with the user indexing settings.
|
||||
let original_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
merge_function,
|
||||
indexer_settings.chunk_compression_type,
|
||||
indexer_settings.chunk_compression_level,
|
||||
|
@ -108,6 +109,7 @@ impl<'a, 'i> Transform<'a, 'i> {
|
|||
|
||||
// We initialize the sorter with the user indexing settings.
|
||||
let flattened_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Stable,
|
||||
merge_function,
|
||||
indexer_settings.chunk_compression_type,
|
||||
indexer_settings.chunk_compression_level,
|
||||
|
|
|
@ -48,6 +48,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
|
|||
// It is forbidden to keep a mutable reference into the database
|
||||
// and write into it at the same time, therefore we write into another file.
|
||||
let mut prefix_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
merge_roaring_bitmaps,
|
||||
self.chunk_compression_type,
|
||||
self.chunk_compression_level,
|
||||
|
|
|
@ -65,6 +65,7 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> {
|
|||
debug!("Computing and writing the word levels positions docids into LMDB on disk...");
|
||||
|
||||
let mut prefix_position_docids_sorter = create_sorter(
|
||||
grenad::SortAlgorithm::Unstable,
|
||||
merge_cbo_roaring_bitmaps,
|
||||
self.chunk_compression_type,
|
||||
self.chunk_compression_level,
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue