Use an unstable algorithm for grenad::Sorter when possible

This commit is contained in:
Loïc Lecrenier 2022-09-13 10:40:37 +02:00
parent f8697075ea
commit 3794962330
14 changed files with 18 additions and 1 deletions

View File

@ -17,7 +17,7 @@ flatten-serde-json = { path = "../flatten-serde-json" }
fst = "0.4.7"
fxhash = "0.2.1"
geoutils = "0.4.1"
grenad = { version = "0.4.2", default-features = false, features = ["tempfile"] }
grenad = { version = "0.4.3", default-features = false, features = ["tempfile"] }
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.3", default-features = false, features = ["lmdb", "sync-read-txn"] }
json-depth-checker = { path = "../json-depth-checker" }
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }

View File

@ -32,6 +32,7 @@ pub fn extract_docid_word_positions<R: io::Read + io::Seek>(
let mut documents_ids = RoaringBitmap::new();
let mut docid_word_positions_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
concat_u32s_array,
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@ -21,6 +21,7 @@ pub fn extract_facet_number_docids<R: io::Read + io::Seek>(
let max_memory = indexer.max_memory_by_thread();
let mut facet_number_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
merge_cbo_roaring_bitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@ -23,6 +23,7 @@ pub fn extract_facet_string_docids<R: io::Read + io::Seek>(
let max_memory = indexer.max_memory_by_thread();
let mut facet_string_docids_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
keep_first_prefix_value_merge_roaring_bitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@ -28,6 +28,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
let max_memory = indexer.max_memory_by_thread();
let mut fid_docid_facet_numbers_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
keep_first,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
@ -36,6 +37,7 @@ pub fn extract_fid_docid_facet_values<R: io::Read + io::Seek>(
);
let mut fid_docid_facet_strings_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
keep_first,
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@ -25,6 +25,7 @@ pub fn extract_fid_word_count_docids<R: io::Read + io::Seek>(
let max_memory = indexer.max_memory_by_thread();
let mut fid_word_count_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
merge_cbo_roaring_bitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@ -30,6 +30,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
let max_memory = indexer.max_memory_by_thread();
let mut word_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
merge_roaring_bitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,
@ -38,6 +39,7 @@ pub fn extract_word_docids<R: io::Read + io::Seek>(
);
let mut exact_word_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
merge_roaring_bitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@ -24,6 +24,7 @@ pub fn extract_word_pair_proximity_docids<R: io::Read + io::Seek>(
let max_memory = indexer.max_memory_by_thread();
let mut word_pair_proximity_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
merge_cbo_roaring_bitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@ -21,6 +21,7 @@ pub fn extract_word_position_docids<R: io::Read + io::Seek>(
let max_memory = indexer.max_memory_by_thread();
let mut word_position_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
merge_cbo_roaring_bitmaps,
indexer.chunk_compression_type,
indexer.chunk_compression_level,

View File

@ -27,6 +27,7 @@ pub fn create_writer<R: io::Write>(
}
pub fn create_sorter(
sort_algorithm: grenad::SortAlgorithm,
merge: MergeFn,
chunk_compression_type: grenad::CompressionType,
chunk_compression_level: Option<u32>,
@ -45,6 +46,7 @@ pub fn create_sorter(
builder.dump_threshold(memory);
builder.allow_realloc(false);
}
builder.sort_algorithm(sort_algorithm);
builder.build()
}

View File

@ -1489,6 +1489,7 @@ mod tests {
assert_eq!(count, 4);
}
#[cfg(feature = "default")]
#[test]
fn test_meilisearch_1714() {
let index = TempIndex::new();

View File

@ -99,6 +99,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// We initialize the sorter with the user indexing settings.
let original_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
merge_function,
indexer_settings.chunk_compression_type,
indexer_settings.chunk_compression_level,
@ -108,6 +109,7 @@ impl<'a, 'i> Transform<'a, 'i> {
// We initialize the sorter with the user indexing settings.
let flattened_sorter = create_sorter(
grenad::SortAlgorithm::Stable,
merge_function,
indexer_settings.chunk_compression_type,
indexer_settings.chunk_compression_level,

View File

@ -48,6 +48,7 @@ impl<'t, 'u, 'i> WordPrefixDocids<'t, 'u, 'i> {
// It is forbidden to keep a mutable reference into the database
// and write into it at the same time, therefore we write into another file.
let mut prefix_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
merge_roaring_bitmaps,
self.chunk_compression_type,
self.chunk_compression_level,

View File

@ -65,6 +65,7 @@ impl<'t, 'u, 'i> WordPrefixPositionDocids<'t, 'u, 'i> {
debug!("Computing and writing the word levels positions docids into LMDB on disk...");
let mut prefix_position_docids_sorter = create_sorter(
grenad::SortAlgorithm::Unstable,
merge_cbo_roaring_bitmaps,
self.chunk_compression_type,
self.chunk_compression_level,