bump heed

2025-05-25 09:03:59 +02:00 · 2022-08-10 16:25:24 +02:00 · 2022-08-10 16:25:24 +02:00 · e96b852107
commit e96b852107
parent 087da5621a
3 changed files with 21 additions and 14 deletions
--- a/milli/Cargo.toml
+++ b/milli/Cargo.toml
@ -18,7 +18,8 @@ fst = "0.4.7"
 fxhash = "0.2.1"
 geoutils = "0.4.1"
 grenad = { version = "0.4.2", default-features = false, features = ["tempfile"] }
-heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
+# heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
 heed = { git = "https://github.com/meilisearch/heed", branch = "compute_size", default-features = false, features = ["lmdb", "sync-read-txn"] }
 json-depth-checker = { path = "../json-depth-checker" }
 levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
 memmap2 = "0.5.3"
--- a/milli/src/error.rs
+++ b/milli/src/error.rs
@ -116,6 +116,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
        }
    )]
    InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
    #[error("{}", HeedError::BadOpenOptions)]
    InvalidLmdbOpenOptions,
    #[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")]
    SortRankingRuleMissing,
    #[error("The database file is in an invalid state.")]
@ -244,6 +246,7 @@ impl From<HeedError> for Error {
            HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })),
            HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
            HeedError::DatabaseClosing => InternalError(DatabaseClosing),
            HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions),
        }
    }
 }
--- a/milli/src/update/index_documents/mod.rs
+++ b/milli/src/update/index_documents/mod.rs
@ -278,27 +278,30 @@ where
        let stop_words = self.index.stop_words(self.wtxn)?;
        let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
        let pool_params = GrenadParameters {
            chunk_compression_type: self.indexer_config.chunk_compression_type,
            chunk_compression_level: self.indexer_config.chunk_compression_level,
            max_memory: self.indexer_config.max_memory,
            max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
        };
        let documents_chunk_size =
            self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4); // 4MiB
        let max_positions_per_attributes = self.indexer_config.max_positions_per_attributes;
        // Run extraction pipeline in parallel.
        pool.install(|| {
            let params = GrenadParameters {
                chunk_compression_type: self.indexer_config.chunk_compression_type,
                chunk_compression_level: self.indexer_config.chunk_compression_level,
                max_memory: self.indexer_config.max_memory,
                max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
            };
            // split obkv file into several chunks
            let original_chunk_iter = grenad_obkv_into_chunks(
                original_documents,
-                params.clone(),
+                pool_params.clone(),
-                self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB
+                documents_chunk_size,
            );
            // split obkv file into several chunks
            let flattened_chunk_iter = grenad_obkv_into_chunks(
                flattened_documents,
-                params.clone(),
+                pool_params.clone(),
-                self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB
+                documents_chunk_size,
            );
            let result = original_chunk_iter
@ -308,14 +311,14 @@ where
                    extract::data_from_obkv_documents(
                        original_chunk,
                        flattened_chunk,
-                        params,
+                        pool_params,
                        lmdb_writer_sx.clone(),
                        searchable_fields,
                        faceted_fields,
                        primary_key_id,
                        geo_fields_ids,
                        stop_words,
-                        self.indexer_config.max_positions_per_attributes,
+                        max_positions_per_attributes,
                        exact_attributes,
                    )
                });