From c71b1d33ae5de96ae013e4695b13bc16263b4c3a Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Wed, 1 Nov 2023 10:39:16 +0100 Subject: [PATCH] Sort entries using rayon in the transform sorters --- Cargo.lock | 5 +- milli/Cargo.toml | 3 +- milli/src/update/index_documents/transform.rs | 51 +++++++++++++------ 3 files changed, 40 insertions(+), 19 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 2ab2f706a..957dffbe4 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1664,11 +1664,12 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b" [[package]] name = "grenad" version = "0.4.4" -source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "5232b2d157b7bf63d7abe1b12177039e58db2f29e377517c0cdee1578cca4c93" +source = "git+https://github.com/meilisearch/grenad?branch=parallel-sorter#eafb6ae795af6078e087edf77e7cd31a26238707" dependencies = [ "bytemuck", "byteorder", + "crossbeam-channel", + "rayon", "tempfile", ] diff --git a/milli/Cargo.toml b/milli/Cargo.toml index 68bc2d2b5..da259c65d 100644 --- a/milli/Cargo.toml +++ b/milli/Cargo.toml @@ -26,7 +26,8 @@ flatten-serde-json = { path = "../flatten-serde-json" } fst = "0.4.7" fxhash = "0.2.1" geoutils = "0.5.1" -grenad = { version = "0.4.4", default-features = false, features = [ +grenad = { git = "https://github.com/meilisearch/grenad", branch = "parallel-sorter", default-features = false, features = [ + "rayon", "tempfile", ] } heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [ diff --git a/milli/src/update/index_documents/transform.rs b/milli/src/update/index_documents/transform.rs index 23b5c78c1..8d1750c49 100644 --- a/milli/src/update/index_documents/transform.rs +++ b/milli/src/update/index_documents/transform.rs @@ -114,24 +114,43 @@ impl<'a, 'i> Transform<'a, 'i> { }; // We initialize the sorter with the user indexing settings. - let original_sorter = create_sorter( - grenad::SortAlgorithm::Stable, - merge_function, - indexer_settings.chunk_compression_type, - indexer_settings.chunk_compression_level, - indexer_settings.max_nb_chunks, - indexer_settings.max_memory.map(|mem| mem / 2), - ); + let original_sorter = { + let mut builder = grenad::Sorter::builder(merge_function); + builder.chunk_compression_type(indexer_settings.chunk_compression_type); + if let Some(level) = indexer_settings.chunk_compression_level { + builder.chunk_compression_level(level); + } + if let Some(nb_chunks) = indexer_settings.max_nb_chunks { + builder.max_nb_chunks(nb_chunks); + } + if let Some(memory) = indexer_settings.max_memory.map(|mem| mem / 2) { + builder.dump_threshold(memory); + builder.allow_realloc(false); + } + builder.sort_algorithm(grenad::SortAlgorithm::Stable); + builder.sort_in_parallel(true); + builder.build() + }; // We initialize the sorter with the user indexing settings. - let flattened_sorter = create_sorter( - grenad::SortAlgorithm::Stable, - merge_function, - indexer_settings.chunk_compression_type, - indexer_settings.chunk_compression_level, - indexer_settings.max_nb_chunks, - indexer_settings.max_memory.map(|mem| mem / 2), - ); + let flattened_sorter = { + let mut builder = grenad::Sorter::builder(merge_function); + builder.chunk_compression_type(indexer_settings.chunk_compression_type); + if let Some(level) = indexer_settings.chunk_compression_level { + builder.chunk_compression_level(level); + } + if let Some(nb_chunks) = indexer_settings.max_nb_chunks { + builder.max_nb_chunks(nb_chunks); + } + if let Some(memory) = indexer_settings.max_memory.map(|mem| mem / 2) { + builder.dump_threshold(memory); + builder.allow_realloc(false); + } + builder.sort_algorithm(grenad::SortAlgorithm::Stable); + builder.sort_in_parallel(true); + builder.build() + }; + let documents_ids = index.documents_ids(wtxn)?; Ok(Transform {