Sort entries using rayon in the transform sorters

This commit is contained in:
Clément Renault 2023-11-01 10:39:16 +01:00
parent 0fc446c62f
commit c71b1d33ae
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
3 changed files with 40 additions and 19 deletions

5
Cargo.lock generated
View File

@ -1664,11 +1664,12 @@ checksum = "d2fabcfbdc87f4758337ca535fb41a6d701b65693ce38287d856d1674551ec9b"
[[package]] [[package]]
name = "grenad" name = "grenad"
version = "0.4.4" version = "0.4.4"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "git+https://github.com/meilisearch/grenad?branch=parallel-sorter#eafb6ae795af6078e087edf77e7cd31a26238707"
checksum = "5232b2d157b7bf63d7abe1b12177039e58db2f29e377517c0cdee1578cca4c93"
dependencies = [ dependencies = [
"bytemuck", "bytemuck",
"byteorder", "byteorder",
"crossbeam-channel",
"rayon",
"tempfile", "tempfile",
] ]

View File

@ -26,7 +26,8 @@ flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7" fst = "0.4.7"
fxhash = "0.2.1" fxhash = "0.2.1"
geoutils = "0.5.1" geoutils = "0.5.1"
grenad = { version = "0.4.4", default-features = false, features = [ grenad = { git = "https://github.com/meilisearch/grenad", branch = "parallel-sorter", default-features = false, features = [
"rayon",
"tempfile", "tempfile",
] } ] }
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [ heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.7", default-features = false, features = [

View File

@ -114,24 +114,43 @@ impl<'a, 'i> Transform<'a, 'i> {
}; };
// We initialize the sorter with the user indexing settings. // We initialize the sorter with the user indexing settings.
let original_sorter = create_sorter( let original_sorter = {
grenad::SortAlgorithm::Stable, let mut builder = grenad::Sorter::builder(merge_function);
merge_function, builder.chunk_compression_type(indexer_settings.chunk_compression_type);
indexer_settings.chunk_compression_type, if let Some(level) = indexer_settings.chunk_compression_level {
indexer_settings.chunk_compression_level, builder.chunk_compression_level(level);
indexer_settings.max_nb_chunks, }
indexer_settings.max_memory.map(|mem| mem / 2), if let Some(nb_chunks) = indexer_settings.max_nb_chunks {
); builder.max_nb_chunks(nb_chunks);
}
if let Some(memory) = indexer_settings.max_memory.map(|mem| mem / 2) {
builder.dump_threshold(memory);
builder.allow_realloc(false);
}
builder.sort_algorithm(grenad::SortAlgorithm::Stable);
builder.sort_in_parallel(true);
builder.build()
};
// We initialize the sorter with the user indexing settings. // We initialize the sorter with the user indexing settings.
let flattened_sorter = create_sorter( let flattened_sorter = {
grenad::SortAlgorithm::Stable, let mut builder = grenad::Sorter::builder(merge_function);
merge_function, builder.chunk_compression_type(indexer_settings.chunk_compression_type);
indexer_settings.chunk_compression_type, if let Some(level) = indexer_settings.chunk_compression_level {
indexer_settings.chunk_compression_level, builder.chunk_compression_level(level);
indexer_settings.max_nb_chunks, }
indexer_settings.max_memory.map(|mem| mem / 2), if let Some(nb_chunks) = indexer_settings.max_nb_chunks {
); builder.max_nb_chunks(nb_chunks);
}
if let Some(memory) = indexer_settings.max_memory.map(|mem| mem / 2) {
builder.dump_threshold(memory);
builder.allow_realloc(false);
}
builder.sort_algorithm(grenad::SortAlgorithm::Stable);
builder.sort_in_parallel(true);
builder.build()
};
let documents_ids = index.documents_ids(wtxn)?; let documents_ids = index.documents_ids(wtxn)?;
Ok(Transform { Ok(Transform {