bump milli version and fix a performance issue for large dumps

This commit is contained in:
tamo 2021-05-06 11:57:42 +02:00
parent 956012da95
commit 26dcb9e66d
No known key found for this signature in database
GPG Key ID: 20CD8020AFA88D69
3 changed files with 7 additions and 9 deletions

6
Cargo.lock generated
View File

@ -1,5 +1,7 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 3
[[package]]
name = "actix-codec"
version = "0.4.0"
@ -1840,8 +1842,8 @@ dependencies = [
[[package]]
name = "milli"
version = "0.2.0"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.2.0#792225eaffce6b3682f9b30b7370b6a547c4757e"
version = "0.2.1"
source = "git+https://github.com/meilisearch/milli.git?tag=v0.2.1#25f75d4d03732131e6edcf20f4d126210b159d43"
dependencies = [
"anyhow",
"bstr",

View File

@ -51,7 +51,7 @@ main_error = "0.1.0"
meilisearch-error = { path = "../meilisearch-error" }
meilisearch-tokenizer = { git = "https://github.com/meilisearch/Tokenizer.git", tag = "v0.2.2" }
memmap = "0.7.0"
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.2.0" }
milli = { git = "https://github.com/meilisearch/milli.git", tag = "v0.2.1" }
mime = "0.3.16"
once_cell = "1.5.2"
oxidized-json-checker = "0.3.2"

View File

@ -333,16 +333,12 @@ impl<S: IndexStore + Sync + Send> IndexActor<S> {
// Get write txn to wait for ongoing write transaction before dump.
let txn = index.write_txn()?;
let documents_ids = index.documents_ids(&txn)?;
// TODO: TAMO: calling this function here can consume **a lot** of RAM, we should
// use some kind of iterators -> waiting for a milli release
let documents = index.documents(&txn, documents_ids)?;
let fields_ids_map = index.fields_ids_map(&txn)?;
// we want to save **all** the fields in the dump.
let fields_to_dump: Vec<u8> = fields_ids_map.iter().map(|(id, _)| id).collect();
for (_doc_id, document) in documents {
for document in index.all_documents(&txn)? {
let (_doc_id, document) = document?;
let json = milli::obkv_to_json(&fields_to_dump, &fields_ids_map, document)?;
file.write_all(serde_json::to_string(&json)?.as_bytes())?;
file.write_all(b"\n")?;