From 0d63d02ab2956a5ce8edc2d600fe32b448d14cea Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Tue, 2 Jul 2024 13:12:56 +0200 Subject: [PATCH] Prefer encoding the output size when compressing documents --- milli/src/heed_codec/compressed_obkv_codec.rs | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) diff --git a/milli/src/heed_codec/compressed_obkv_codec.rs b/milli/src/heed_codec/compressed_obkv_codec.rs index f9a1d0966..d6ec8e717 100644 --- a/milli/src/heed_codec/compressed_obkv_codec.rs +++ b/milli/src/heed_codec/compressed_obkv_codec.rs @@ -30,15 +30,11 @@ impl<'a> CompressedKvReaderU16<'a> { buffer: &'b mut Vec, dictionnary: &[u8], ) -> Result, lz4_flex::block::DecompressError> { - // TODO WHAT THE HECK!!! WHY DO I NEED TO INCREASE THE SIZE PROVIDED - let max_size = lz4_flex::block::get_maximum_output_size(self.0.len()) * 2; - buffer.resize(max_size, 0); + let (size, input) = lz4_flex::block::uncompressed_size(self.0)?; + buffer.resize(size, 0); // TODO loop to increase the buffer size of need be - let size = lz4_flex::block::decompress_into_with_dict( - self.0, - &mut buffer[..max_size], - dictionnary, - )?; + let size = + lz4_flex::block::decompress_into_with_dict(input, &mut buffer[..size], dictionnary)?; Ok(KvReaderU16::new(&buffer[..size])) } @@ -53,7 +49,7 @@ pub struct CompressedKvWriterU16(Vec); impl CompressedKvWriterU16 { // TODO ask for a KvReaderU16 here pub fn new_with_dictionary(writer: &[u8], dictionary: &[u8]) -> Self { - CompressedKvWriterU16(lz4_flex::block::compress_with_dict(writer, dictionary)) + CompressedKvWriterU16(lz4_flex::block::compress_prepend_size_with_dict(writer, dictionary)) } pub fn as_bytes(&self) -> &[u8] {