From e96b852107221f58a91ec1f023d606eeefce99af Mon Sep 17 00:00:00 2001
From: Irevoire <tamo@meilisearch.com>
Date: Wed, 10 Aug 2022 16:25:24 +0200
Subject: [PATCH] bump heed

---
 milli/Cargo.toml                        |  3 ++-
 milli/src/error.rs                      |  3 +++
 milli/src/update/index_documents/mod.rs | 29 ++++++++++++++-----------
 3 files changed, 21 insertions(+), 14 deletions(-)
diff --git a/milli/Cargo.toml b/milli/Cargo.toml
index 2bb6a50a1..fbe756ac6 100644
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@@ -18,7 +18,8 @@ fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.4.1"
 grenad = { version = "0.4.2", default-features = false, features = ["tempfile"] }
-heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
+# heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
+heed = { git = "https://github.com/meilisearch/heed", branch = "compute_size", default-features = false, features = ["lmdb", "sync-read-txn"] }
 json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
 memmap2 = "0.5.3"
diff --git a/milli/src/error.rs b/milli/src/error.rs
index c817f64fa..d3f0a179f 100644
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@@ -116,6 +116,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
         }
     )]
     InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
+    #[error("{}", HeedError::BadOpenOptions)]
+    InvalidLmdbOpenOptions,
     #[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")]
     SortRankingRuleMissing,
     #[error("The database file is in an invalid state.")]
@@ -244,6 +246,7 @@ impl From<HeedError> for Error {
             HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })),
             HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
             HeedError::DatabaseClosing => InternalError(DatabaseClosing),
+            HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions),
         }
     }
 }
diff --git a/milli/src/update/index_documents/mod.rs b/milli/src/update/index_documents/mod.rs
index d1f030fdd..f5e04435d 100644
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@@ -278,27 +278,30 @@ where
         let stop_words = self.index.stop_words(self.wtxn)?;
         let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
 
+        let pool_params = GrenadParameters {
+            chunk_compression_type: self.indexer_config.chunk_compression_type,
+            chunk_compression_level: self.indexer_config.chunk_compression_level,
+            max_memory: self.indexer_config.max_memory,
+            max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
+        };
+        let documents_chunk_size =
+            self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4); // 4MiB
+        let max_positions_per_attributes = self.indexer_config.max_positions_per_attributes;
+
         // Run extraction pipeline in parallel.
         pool.install(|| {
-            let params = GrenadParameters {
-                chunk_compression_type: self.indexer_config.chunk_compression_type,
-                chunk_compression_level: self.indexer_config.chunk_compression_level,
-                max_memory: self.indexer_config.max_memory,
-                max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
-            };
-
             // split obkv file into several chunks
             let original_chunk_iter = grenad_obkv_into_chunks(
                 original_documents,
-                params.clone(),
-                self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB
+                pool_params.clone(),
+                documents_chunk_size,
             );
 
             // split obkv file into several chunks
             let flattened_chunk_iter = grenad_obkv_into_chunks(
                 flattened_documents,
-                params.clone(),
-                self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB
+                pool_params.clone(),
+                documents_chunk_size,
             );
 
             let result = original_chunk_iter
@@ -308,14 +311,14 @@ where
                     extract::data_from_obkv_documents(
                         original_chunk,
                         flattened_chunk,
-                        params,
+                        pool_params,
                         lmdb_writer_sx.clone(),
                         searchable_fields,
                         faceted_fields,
                         primary_key_id,
                         geo_fields_ids,
                         stop_words,
-                        self.indexer_config.max_positions_per_attributes,
+                        max_positions_per_attributes,
                         exact_attributes,
                     )
                 });