mirror of
https://github.com/meilisearch/MeiliSearch
synced 2025-01-09 13:04:30 +01:00
Generate the dictionary only when necessary
This commit is contained in:
parent
bfaebb50b2
commit
4232e522ea
@ -31,6 +31,8 @@ const DICTIONARY_MAX_SIZE: usize = 64_000;
|
||||
/// have not already been compressed in the database. If this threshold
|
||||
/// is reached, we do not generate a dictionary and continue as is.
|
||||
const COMPRESS_LIMIT: usize = 5_000_000;
|
||||
/// This is 10KiB.
|
||||
const TEN_KIB: usize = 10 * 1024;
|
||||
|
||||
/// A function dedicated to use the existing or generate an appropriate
|
||||
/// document compression dictionay based on the documents available in
|
||||
@ -115,6 +117,16 @@ where
|
||||
);
|
||||
}
|
||||
|
||||
// We avoid generating a dictionary if most (> 1/3) of the sample sizes are
|
||||
// smaller than 8 bytes, or if the sample data size is smaller than 10KiB.
|
||||
//
|
||||
// <https://github.com/facebook/zstd/blob/0218c8de0fa77bbd87e75f2ea70ba00b93460e15/lib/zdict.h#L190-L209>
|
||||
if sample_sizes.iter().filter(|s| **s < 8).count() > sample_sizes.len() / 3
|
||||
|| sample_data.len() < TEN_KIB
|
||||
{
|
||||
return Ok(None);
|
||||
}
|
||||
|
||||
let dictionary = from_continuous(&sample_data, &sample_sizes, DICTIONARY_MAX_SIZE)?;
|
||||
index.put_document_compression_dictionary(wtxn, &dictionary)?;
|
||||
let encoder_dictionary = EncoderDictionary::copy(&dictionary, COMPRESSION_LEVEL);
|
||||
|
Loading…
x
Reference in New Issue
Block a user