From c1dd489adc4a6ef24e0c13ebb7530dc59fff5603 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= <clement@meilisearch.com>
Date: Tue, 17 Dec 2024 16:25:53 +0100
Subject: [PATCH] Fix the usage of compressed documents

---
 crates/index-scheduler/src/batch.rs  |  4 ++--
 crates/index-scheduler/src/lib.rs    | 12 ++++++++++--
 crates/meilisearch/src/search/mod.rs | 23 +++++------------------
 3 files changed, 17 insertions(+), 22 deletions(-)
diff --git a/crates/index-scheduler/src/batch.rs b/crates/index-scheduler/src/batch.rs
index 3cdd2eba9..a0b984f71 100644
--- a/crates/index-scheduler/src/batch.rs
+++ b/crates/index-scheduler/src/batch.rs
@@ -891,10 +891,10 @@ impl IndexScheduler {
                     let (atomic, update_document_progress) = AtomicDocumentStep::new(nb_documents);
                     progress.update_progress(update_document_progress);
                     let documents = index
-                        .all_documents(&rtxn)
+                        .all_compressed_documents(&rtxn)
                         .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
                     // 3.1. Dump the documents
-                    for ret in index.all_compressed_documents(&rtxn)? {
+                    for ret in documents {
                         if self.must_stop_processing.get() {
                             return Err(Error::AbortedTask);
                         }
diff --git a/crates/index-scheduler/src/lib.rs b/crates/index-scheduler/src/lib.rs
index 7e20667dc..e7d085aad 100644
--- a/crates/index-scheduler/src/lib.rs
+++ b/crates/index-scheduler/src/lib.rs
@@ -3129,10 +3129,18 @@ mod tests {
         let rtxn = index.read_txn().unwrap();
         let field_ids_map = index.fields_ids_map(&rtxn).unwrap();
         let field_ids = field_ids_map.ids().collect::<Vec<_>>();
+        let dictionary = index.document_decompression_dictionary(&rtxn).unwrap();
+        let mut buffer = Vec::new();
         let documents = index
-            .all_documents(&rtxn)
+            .all_compressed_documents(&rtxn)
             .unwrap()
-            .map(|ret| obkv_to_json(&field_ids, &field_ids_map, ret.unwrap().1).unwrap())
+            .map(|ret| {
+                let (_docid, compressed_doc) = ret.unwrap();
+                let doc = compressed_doc
+                    .decompress_with_optional_dictionary(&mut buffer, dictionary.as_ref())
+                    .unwrap();
+                obkv_to_json(&field_ids, &field_ids_map, doc).unwrap()
+            })
             .collect::<Vec<_>>();
         snapshot!(serde_json::to_string_pretty(&documents).unwrap(), name: "documents_remaining_should_only_be_bork");
     }
diff --git a/crates/meilisearch/src/search/mod.rs b/crates/meilisearch/src/search/mod.rs
index 9ee28a22a..165611b6c 100644
--- a/crates/meilisearch/src/search/mod.rs
+++ b/crates/meilisearch/src/search/mod.rs
@@ -1293,26 +1293,13 @@ impl<'a> HitMaker<'a> {
     }
 
     pub fn make_hit(&self, id: u32, score: &[ScoreDetails]) -> milli::Result<SearchHit> {
-        let (_, obkv) =
-            self.index.iter_documents(self.rtxn, std::iter::once(id))?.next().unwrap()?;
+        let mut buffer = Vec::new();
+        let dict = self.index.document_decompression_dictionary(self.rtxn)?;
+        let compressed = self.index.compressed_document(self.rtxn, id)?.unwrap();
+        let doc = compressed.decompress_with_optional_dictionary(&mut buffer, dict.as_ref())?;
 
-    // let mut formatter_builder = MatcherBuilder::new(matching_words, tokenizer_builder.build());
-    // formatter_builder.crop_marker(format.crop_marker);
-    // formatter_builder.highlight_prefix(format.highlight_pre_tag);
-    // formatter_builder.highlight_suffix(format.highlight_post_tag);
-    // let decompression_dictionary = index.document_decompression_dictionary(rtxn)?;
-    // let mut buffer = Vec::new();
-    // let mut documents = Vec::new();
-    // let embedding_configs = index.embedding_configs(rtxn)?;
-    // let documents_iter = index.compressed_documents(rtxn, documents_ids)?;
-    // for ((id, compressed), score) in documents_iter.into_iter().zip(document_scores.into_iter()) {
-    //     let obkv = compressed
-    //         .decompress_with_optional_dictionary(&mut buffer, decompression_dictionary.as_ref())
-    //         // TODO use a better error?
-    //         .map_err(|e| MeilisearchHttpError::HeedError(e.into()))?;
         // First generate a document with all the displayed fields
-        let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, obkv)?;
-
+        let displayed_document = make_document(&self.displayed_ids, &self.fields_ids_map, doc)?;
         let add_vectors_fid =
             self.vectors_fid.filter(|_fid| self.retrieve_vectors == RetrieveVectors::Retrieve);