mirror of
https://github.com/meilisearch/MeiliSearch
synced 2024-11-29 16:24:26 +01:00
bump heed
This commit is contained in:
parent
087da5621a
commit
e96b852107
@ -18,7 +18,8 @@ fst = "0.4.7"
|
|||||||
fxhash = "0.2.1"
|
fxhash = "0.2.1"
|
||||||
geoutils = "0.4.1"
|
geoutils = "0.4.1"
|
||||||
grenad = { version = "0.4.2", default-features = false, features = ["tempfile"] }
|
grenad = { version = "0.4.2", default-features = false, features = ["tempfile"] }
|
||||||
heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
# heed = { git = "https://github.com/meilisearch/heed", tag = "v0.12.1", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||||
|
heed = { git = "https://github.com/meilisearch/heed", branch = "compute_size", default-features = false, features = ["lmdb", "sync-read-txn"] }
|
||||||
json-depth-checker = { path = "../json-depth-checker" }
|
json-depth-checker = { path = "../json-depth-checker" }
|
||||||
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
levenshtein_automata = { version = "0.2.1", features = ["fst_automaton"] }
|
||||||
memmap2 = "0.5.3"
|
memmap2 = "0.5.3"
|
||||||
|
@ -116,6 +116,8 @@ only composed of alphanumeric characters (a-z A-Z 0-9), hyphens (-) and undersco
|
|||||||
}
|
}
|
||||||
)]
|
)]
|
||||||
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
|
InvalidSortableAttribute { field: String, valid_fields: BTreeSet<String> },
|
||||||
|
#[error("{}", HeedError::BadOpenOptions)]
|
||||||
|
InvalidLmdbOpenOptions,
|
||||||
#[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")]
|
#[error("The sort ranking rule must be specified in the ranking rules settings to use the sort parameter at search time.")]
|
||||||
SortRankingRuleMissing,
|
SortRankingRuleMissing,
|
||||||
#[error("The database file is in an invalid state.")]
|
#[error("The database file is in an invalid state.")]
|
||||||
@ -244,6 +246,7 @@ impl From<HeedError> for Error {
|
|||||||
HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })),
|
HeedError::Decoding => InternalError(Serialization(Decoding { db_name: None })),
|
||||||
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
|
HeedError::InvalidDatabaseTyping => InternalError(InvalidDatabaseTyping),
|
||||||
HeedError::DatabaseClosing => InternalError(DatabaseClosing),
|
HeedError::DatabaseClosing => InternalError(DatabaseClosing),
|
||||||
|
HeedError::BadOpenOptions => UserError(InvalidLmdbOpenOptions),
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -278,27 +278,30 @@ where
|
|||||||
let stop_words = self.index.stop_words(self.wtxn)?;
|
let stop_words = self.index.stop_words(self.wtxn)?;
|
||||||
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
|
let exact_attributes = self.index.exact_attributes_ids(self.wtxn)?;
|
||||||
|
|
||||||
|
let pool_params = GrenadParameters {
|
||||||
|
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
||||||
|
chunk_compression_level: self.indexer_config.chunk_compression_level,
|
||||||
|
max_memory: self.indexer_config.max_memory,
|
||||||
|
max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
|
||||||
|
};
|
||||||
|
let documents_chunk_size =
|
||||||
|
self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4); // 4MiB
|
||||||
|
let max_positions_per_attributes = self.indexer_config.max_positions_per_attributes;
|
||||||
|
|
||||||
// Run extraction pipeline in parallel.
|
// Run extraction pipeline in parallel.
|
||||||
pool.install(|| {
|
pool.install(|| {
|
||||||
let params = GrenadParameters {
|
|
||||||
chunk_compression_type: self.indexer_config.chunk_compression_type,
|
|
||||||
chunk_compression_level: self.indexer_config.chunk_compression_level,
|
|
||||||
max_memory: self.indexer_config.max_memory,
|
|
||||||
max_nb_chunks: self.indexer_config.max_nb_chunks, // default value, may be chosen.
|
|
||||||
};
|
|
||||||
|
|
||||||
// split obkv file into several chunks
|
// split obkv file into several chunks
|
||||||
let original_chunk_iter = grenad_obkv_into_chunks(
|
let original_chunk_iter = grenad_obkv_into_chunks(
|
||||||
original_documents,
|
original_documents,
|
||||||
params.clone(),
|
pool_params.clone(),
|
||||||
self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB
|
documents_chunk_size,
|
||||||
);
|
);
|
||||||
|
|
||||||
// split obkv file into several chunks
|
// split obkv file into several chunks
|
||||||
let flattened_chunk_iter = grenad_obkv_into_chunks(
|
let flattened_chunk_iter = grenad_obkv_into_chunks(
|
||||||
flattened_documents,
|
flattened_documents,
|
||||||
params.clone(),
|
pool_params.clone(),
|
||||||
self.indexer_config.documents_chunk_size.unwrap_or(1024 * 1024 * 4), // 4MiB
|
documents_chunk_size,
|
||||||
);
|
);
|
||||||
|
|
||||||
let result = original_chunk_iter
|
let result = original_chunk_iter
|
||||||
@ -308,14 +311,14 @@ where
|
|||||||
extract::data_from_obkv_documents(
|
extract::data_from_obkv_documents(
|
||||||
original_chunk,
|
original_chunk,
|
||||||
flattened_chunk,
|
flattened_chunk,
|
||||||
params,
|
pool_params,
|
||||||
lmdb_writer_sx.clone(),
|
lmdb_writer_sx.clone(),
|
||||||
searchable_fields,
|
searchable_fields,
|
||||||
faceted_fields,
|
faceted_fields,
|
||||||
primary_key_id,
|
primary_key_id,
|
||||||
geo_fields_ids,
|
geo_fields_ids,
|
||||||
stop_words,
|
stop_words,
|
||||||
self.indexer_config.max_positions_per_attributes,
|
max_positions_per_attributes,
|
||||||
exact_attributes,
|
exact_attributes,
|
||||||
)
|
)
|
||||||
});
|
});
|
||||||
|
Loading…
Reference in New Issue
Block a user