Use the zstd library directly to be able to define the compression level

This commit is contained in:
Clément Renault 2024-07-02 17:34:02 +02:00
parent b15e8aacb6
commit e18b06ddda
No known key found for this signature in database
GPG key ID: F250A4C4E3AE5F5F
13 changed files with 503 additions and 430 deletions

View file

@ -20,6 +20,7 @@ use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::DocumentId;
use meilisearch_types::star_or::OptionStarOrList;
use meilisearch_types::tasks::KindWithContent;
use meilisearch_types::zstd::dict::DecoderDictionary;
use meilisearch_types::{milli, Document, Index};
use mime::Mime;
use once_cell::sync::Lazy;
@ -603,7 +604,7 @@ fn some_documents<'a, 't: 'a>(
retrieve_vectors: RetrieveVectors,
) -> Result<impl Iterator<Item = Result<Document, ResponseError>> + 'a, ResponseError> {
let fields_ids_map = index.fields_ids_map(rtxn)?;
let dictionary = index.document_compression_dictionary(rtxn)?;
let dictionary = index.document_compression_dictionary(rtxn)?.map(DecoderDictionary::copy);
let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect();
let embedding_configs = index.embedding_configs(rtxn)?;
let mut buffer = Vec::new();
@ -611,7 +612,7 @@ fn some_documents<'a, 't: 'a>(
Ok(index.iter_compressed_documents(rtxn, doc_ids)?.map(move |ret| {
ret.map_err(ResponseError::from).and_then(
|(key, compressed_document)| -> Result<_, ResponseError> {
let document = match dictionary {
let document = match dictionary.as_ref() {
// TODO manage this unwrap correctly
Some(dict) => compressed_document.decompress_with(&mut buffer, dict).unwrap(),
None => compressed_document.as_non_compressed(),

View file

@ -19,6 +19,7 @@ use meilisearch_types::milli::vector::parsed_vectors::ExplicitVectors;
use meilisearch_types::milli::vector::Embedder;
use meilisearch_types::milli::{FacetValueHit, OrderBy, SearchForFacetValues, TimeBudget};
use meilisearch_types::settings::DEFAULT_PAGINATION_MAX_TOTAL_HITS;
use meilisearch_types::zstd::dict::DecoderDictionary;
use meilisearch_types::{milli, Document};
use milli::tokenizer::TokenizerBuilder;
use milli::{
@ -1123,13 +1124,14 @@ fn make_hits(
formatter_builder.crop_marker(format.crop_marker);
formatter_builder.highlight_prefix(format.highlight_pre_tag);
formatter_builder.highlight_suffix(format.highlight_post_tag);
let compression_dictionary = index.document_compression_dictionary(rtxn)?;
let compression_dictionary =
index.document_compression_dictionary(rtxn)?.map(DecoderDictionary::copy);
let mut buffer = Vec::new();
let mut documents = Vec::new();
let embedding_configs = index.embedding_configs(rtxn)?;
let documents_iter = index.compressed_documents(rtxn, documents_ids)?;
for ((id, compressed), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
let obkv = match compression_dictionary {
let obkv = match compression_dictionary.as_ref() {
// TODO manage this unwrap correctly
Some(dict) => compressed.decompress_with(&mut buffer, dict).unwrap(),
None => compressed.as_non_compressed(),