move to our new S3 lib

This commit is contained in:
Tamo 2023-09-28 11:18:56 +02:00
parent 6325cda74f
commit 98b67f217a
9 changed files with 116 additions and 212 deletions

214
Cargo.lock generated
View File

@ -428,54 +428,12 @@ dependencies = [
"critical-section",
]
[[package]]
name = "attohttpc"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fcf00bc6d5abb29b5f97e3c61a90b6d3caa12f3faf897d4a3e3607c050a35a7"
dependencies = [
"http",
"log",
"rustls 0.20.8",
"serde",
"serde_json",
"url",
"webpki",
"webpki-roots 0.22.6",
]
[[package]]
name = "autocfg"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "aws-creds"
version = "0.34.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3776743bb68d4ad02ba30ba8f64373f1be4e082fe47651767171ce75bb2f6cf5"
dependencies = [
"attohttpc",
"dirs",
"log",
"quick-xml",
"rust-ini",
"serde",
"thiserror",
"time",
"url",
]
[[package]]
name = "aws-region"
version = "0.25.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "056557a61427d0e5ba29dd931031c8ffed4ee7a550e7cd55692a9d8deb0a9dba"
dependencies = [
"thiserror",
]
[[package]]
name = "backtrace"
version = "0.3.67"
@ -1230,15 +1188,6 @@ dependencies = [
"subtle",
]
[[package]]
name = "dirs"
version = "4.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059"
dependencies = [
"dirs-sys",
]
[[package]]
name = "dirs-next"
version = "1.0.2"
@ -1249,17 +1198,6 @@ dependencies = [
"dirs-sys-next",
]
[[package]]
name = "dirs-sys"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6"
dependencies = [
"libc",
"redox_users",
"winapi 0.3.9",
]
[[package]]
name = "dirs-sys-next"
version = "0.1.2"
@ -1271,12 +1209,6 @@ dependencies = [
"winapi 0.3.9",
]
[[package]]
name = "dlv-list"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257"
[[package]]
name = "dump"
version = "1.3.0"
@ -1548,9 +1480,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]]
name = "form_urlencoded"
version = "1.1.0"
version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8"
checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652"
dependencies = [
"percent-encoding",
]
@ -1980,9 +1912,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]]
name = "idna"
version = "0.3.0"
version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6"
checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c"
dependencies = [
"unicode-bidi",
"unicode-normalization",
@ -2011,9 +1943,9 @@ dependencies = [
"parking_lot",
"puffin",
"roaring",
"rust-s3",
"serde",
"serde_json",
"strois",
"synchronoise",
"tempfile",
"thiserror",
@ -2567,9 +2499,9 @@ dependencies = [
[[package]]
name = "log"
version = "0.4.19"
version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4"
checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]]
name = "logging_timer"
@ -2618,14 +2550,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]]
name = "maybe-async"
version = "0.2.7"
name = "md-5"
version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f1b8c13cb1f814b634a96b2c725449fe7ed464a7b8781de8688be5ffbd3f305"
checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
dependencies = [
"proc-macro2",
"quote",
"syn 1.0.109",
"cfg-if 1.0.0",
"digest",
]
[[package]]
@ -2705,7 +2636,6 @@ dependencies = [
"rayon",
"regex",
"reqwest",
"rust-s3",
"rustls 0.20.8",
"rustls-pemfile",
"segment",
@ -2717,6 +2647,7 @@ dependencies = [
"siphasher",
"slice-group-by",
"static-files",
"strois",
"sysinfo",
"tar",
"temp-env",
@ -3094,16 +3025,6 @@ dependencies = [
"num-traits",
]
[[package]]
name = "ordered-multimap"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccd746e37177e1711c20dd619a1620f34f5c8b569c53590a72dedd5344d8924a"
dependencies = [
"dlv-list",
"hashbrown 0.12.3",
]
[[package]]
name = "page_size"
version = "0.4.2"
@ -3202,9 +3123,9 @@ dependencies = [
[[package]]
name = "percent-encoding"
version = "2.2.0"
version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e"
checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
[[package]]
name = "permissive-json-pointer"
@ -3463,9 +3384,9 @@ dependencies = [
[[package]]
name = "quick-xml"
version = "0.26.0"
version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd"
checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
dependencies = [
"memchr",
"serde",
@ -3678,45 +3599,6 @@ dependencies = [
"smallvec",
]
[[package]]
name = "rust-ini"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df"
dependencies = [
"cfg-if 1.0.0",
"ordered-multimap",
]
[[package]]
name = "rust-s3"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b2ac5ff6acfbe74226fa701b5ef793aaa054055c13ebb7060ad36942956e027"
dependencies = [
"async-trait",
"attohttpc",
"aws-creds",
"aws-region",
"base64 0.13.1",
"bytes 1.4.0",
"cfg-if 1.0.0",
"hex",
"hmac",
"http",
"log",
"maybe-async",
"md5",
"percent-encoding",
"quick-xml",
"serde",
"serde_derive",
"sha2",
"thiserror",
"time",
"url",
]
[[package]]
name = "rustc-demangle"
version = "0.1.23"
@ -3822,6 +3704,24 @@ version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06"
[[package]]
name = "rusty-s3"
version = "0.4.1"
source = "git+https://github.com/paolobarbolini/rusty-s3?rev=0b60b9d#0b60b9d10f2e3db01bb772768ae0afa0d57a7239"
dependencies = [
"base64 0.21.2",
"hmac",
"md-5",
"percent-encoding",
"quick-xml",
"serde",
"serde_json",
"sha2",
"time",
"url",
"zeroize",
]
[[package]]
name = "ryu"
version = "1.0.13"
@ -3875,9 +3775,9 @@ checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
[[package]]
name = "serde"
version = "1.0.180"
version = "1.0.188"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ea67f183f058fe88a4e3ec6e2788e003840893b91bac4559cabedd00863b3ed"
checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
dependencies = [
"serde_derive",
]
@ -3902,9 +3802,9 @@ dependencies = [
[[package]]
name = "serde_derive"
version = "1.0.180"
version = "1.0.188"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24e744d7782b686ab3b73267ef05697159cc0e5abbed3f47f9933165e5219036"
checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
dependencies = [
"proc-macro2",
"quote",
@ -4106,6 +4006,21 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "strois"
version = "0.1.0"
source = "git+http://github.com/meilisearch/strois?branch=main#f977fd50e8e082d631a1dbf98f4e75e263112981"
dependencies = [
"http",
"log",
"quick-xml",
"rusty-s3",
"serde",
"thiserror",
"ureq",
"url",
]
[[package]]
name = "strsim"
version = "0.10.0"
@ -4220,18 +4135,18 @@ dependencies = [
[[package]]
name = "thiserror"
version = "1.0.44"
version = "1.0.49"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90"
checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "1.0.44"
version = "1.0.49"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96"
checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
dependencies = [
"proc-macro2",
"quote",
@ -4500,19 +4415,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9"
dependencies = [
"base64 0.21.2",
"flate2",
"log",
"once_cell",
"rustls 0.21.1",
"rustls-webpki",
"serde",
"serde_json",
"url",
"webpki-roots 0.23.1",
]
[[package]]
name = "url"
version = "2.3.1"
version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643"
checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5"
dependencies = [
"form_urlencoded",
"idna",
@ -4993,6 +4911,12 @@ dependencies = [
"synstructure",
]
[[package]]
name = "zeroize"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9"
[[package]]
name = "zip"
version = "0.6.6"

View File

@ -34,7 +34,7 @@ uuid = { version = "1.3.1", features = ["serde", "v4"] }
tokio = { version = "1.27.0", features = ["full"] }
zookeeper = "0.8.0"
parking_lot = "0.12.1"
rust-s3 = { version = "0.33.0", default-features = false, features = ["sync-rustls-tls"] }
strois = { git = "http://github.com/meilisearch/strois", branch = "main" }
[dev-dependencies]
big_s = "1.0.2"

View File

@ -46,6 +46,7 @@ use dump::{KindDump, TaskDump, UpdateFile};
pub use error::Error;
pub use features::RoFeatures;
use file_store::FileStore;
use strois::Bucket;
use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
@ -56,7 +57,6 @@ use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmap
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard};
use roaring::RoaringBitmap;
use s3::Bucket;
use synchronoise::SignalEvent;
use tempfile::TempDir;
use time::format_description::well_known::Rfc3339;
@ -74,6 +74,9 @@ use crate::utils::{check_index_swap_validity, clamp_to_page_size};
pub(crate) type BEI128 =
meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>;
/// This is the size of the buffer we use to send each part of a multipart request to S3.
pub const S3_PART_SIZE: usize = 50 * 1024 * 1024; // 50MiB
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
///
/// An empty/default query (where each field is set to `None`) matches all tasks.
@ -467,14 +470,7 @@ impl IndexScheduler {
.unwrap();
let s3 = inner.options.s3.as_ref().unwrap();
let task =
s3.get_object(format!("/tasks/{id:0>10}")).unwrap();
assert_eq!(
task.status_code(),
200,
"could not reach the s3: {:?}",
task.as_str()
);
let task = serde_json::from_slice(task.as_slice()).unwrap();
s3.get_object_json(format!("tasks/{id:0>10}")).unwrap();
inner.register_raw_task(&mut wtxn, &task).unwrap();
// we received a new tasks, we must wake up
self.wake_up.signal();
@ -511,14 +507,8 @@ impl IndexScheduler {
.map(|(_, id)| id.parse::<u32>().unwrap())
.unwrap();
let s3 = inner.options.s3.as_ref().unwrap();
let task = s3.get_object(format!("tasks/{id:0>10}")).unwrap();
assert_eq!(
task.status_code(),
200,
"could not reach the s3: {:?}",
task.as_str()
);
let task = serde_json::from_slice(task.as_slice()).unwrap();
let task =
s3.get_object_json(format!("tasks/{id:0>10}")).unwrap();
inner.register_raw_task(&mut wtxn, &task).unwrap();
wtxn.commit().unwrap();
}
@ -589,11 +579,7 @@ impl IndexScheduler {
);
let mut version_file_path =
File::open(&inner.version_file_path).unwrap();
s3.put_object_stream(&mut version_file_path, dst)
.or_else(|e| match e {
s3::error::S3Error::Http(404, _) => Ok(200),
e => Err(e),
})
s3.put_object_multipart(dst, &mut version_file_path, S3_PART_SIZE)
.unwrap();
version_file_path.sync_data().unwrap();
drop(version_file_path);
@ -610,14 +596,11 @@ impl IndexScheduler {
heed::CompactionOption::Enabled,
)
.unwrap();
s3.put_object_stream(
&mut file,
s3.put_object_multipart(
format!("{snapshot_dir}/tasks.mdb"),
&mut file,
S3_PART_SIZE,
)
.or_else(|e| match e {
s3::error::S3Error::Http(404, _) => Ok(200),
e => Err(e),
})
.unwrap();
temp.close().unwrap();
@ -637,11 +620,11 @@ impl IndexScheduler {
heed::CompactionOption::Enabled,
)
.unwrap();
s3.put_object_stream(&mut file, format!("{dst}/{uuid}.mdb"))
.or_else(|e| match e {
s3::error::S3Error::Http(404, _) => Ok(200),
e => Err(e),
})
s3.put_object_multipart(
format!("{dst}/{uuid}.mdb"),
&mut file,
S3_PART_SIZE,
)
.unwrap();
temp.close().unwrap();
}
@ -925,26 +908,23 @@ fn load_snapshot(this: &IndexScheduler, path: &str) -> anyhow::Result<()> {
log::info!("Downloading the index scheduler database.");
let tasks_snapshot = format!("{path}/tasks.mdb");
let status = s3.get_object_to_writer(tasks_snapshot, &mut tasks_file)?;
assert!(matches!(status, 200 | 202));
s3.get_object_to_writer(tasks_snapshot, &mut tasks_file)?;
log::info!("Downloading the indexes databases");
let indexes_files = tempfile::TempDir::new_in(&base_path)?;
let src = format!("{path}/indexes");
let uuids = s3.list(src.clone(), None)?.into_iter().flat_map(|lbr| {
lbr.contents.into_iter().map(|o| {
let (_, name) = o.key.rsplit_once('/').unwrap();
let uuids = s3.list_objects(&src)?.into_iter().map(|lbr| {
let key = lbr.unwrap().key;
let (_, name) = key.rsplit_once('/').unwrap();
name.to_string()
})
});
for uuid in uuids {
log::info!("\tDownloading the index {}", uuid);
std::fs::create_dir_all(indexes_files.path().join(&uuid).with_extension(""))?;
let path = indexes_files.path().join(&uuid).with_extension("").join("data.mdb");
let mut file = File::create(path)?;
let status = s3.get_object_to_writer(format!("{src}/{uuid}"), &mut file)?;
assert!(matches!(status, 200 | 202));
s3.get_object_to_writer(format!("{src}/{uuid}"), &mut file)?;
}
// 3. Lock the index-mapper and close all the env
@ -1273,7 +1253,7 @@ impl IndexSchedulerInner {
for uuid in batch.content_uuids() {
// TODO use a real UUIDv4
let (_, file) = self.file_store.new_update_with_uuid(uuid.as_u128())?;
s3.get_object_to_writer(&format!("/update-files/{}", uuid), &mut &*file).unwrap();
s3.get_object_to_writer(&format!("update-files/{}", uuid), &mut &*file).unwrap();
file.persist()?;
}
}

View File

@ -1,4 +1,3 @@
use std::borrow::Borrow;
use std::fmt::{self, Debug, Display};
use std::fs::File;
use std::io::{self, Seek, Write};
@ -42,7 +41,7 @@ impl Display for DocumentFormatError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Io(e) => write!(f, "{e}"),
Self::MalformedPayload(me, b) => match me.borrow() {
Self::MalformedPayload(me, b) => match me {
Error::Json(se) => {
let mut message = match se.classify() {
Category::Data => {

View File

@ -175,6 +175,7 @@ macro_rules! make_error_codes {
// An exhaustive list of all the error codes used by meilisearch.
make_error_codes! {
S3Error , System , INTERNAL_SERVER_ERROR;
ApiKeyAlreadyExists , InvalidRequest , CONFLICT ;
ApiKeyNotFound , InvalidRequest , NOT_FOUND ;
BadParameter , InvalidRequest , BAD_REQUEST;

View File

@ -80,7 +80,7 @@ reqwest = { version = "0.11.16", features = [
], default-features = false }
rustls = "0.20.8"
rustls-pemfile = "1.0.2"
rust-s3 = { version = "0.33.0", default-features = false, features = ["sync-rustls-tls"] }
strois = { git = "http://github.com/meilisearch/strois", branch = "main" }
segment = { version = "0.2.2", optional = true }
serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] }

View File

@ -33,6 +33,8 @@ pub enum MeilisearchHttpError {
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
)]
SwapIndexPayloadWrongLength(Vec<IndexUid>),
#[error("S3 Error: {0}")]
S3Error(#[from] strois::Error),
#[error(transparent)]
IndexUid(#[from] IndexUidFormatError),
#[error(transparent)]
@ -65,6 +67,7 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
MeilisearchHttpError::S3Error(_) => Code::S3Error,
MeilisearchHttpError::IndexUid(e) => e.error_code(),
MeilisearchHttpError::SerdeJson(_) => Code::Internal,
MeilisearchHttpError::HeedError(_) => Code::Internal,

View File

@ -30,6 +30,7 @@ use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use log::error;
use strois::Client;
use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@ -39,8 +40,6 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt;
use option::ScheduleSnapshot;
use s3::creds::Credentials;
use s3::{Bucket, Region};
use zookeeper::ZooKeeper;
use crate::error::MeilisearchHttpError;
@ -246,19 +245,16 @@ fn open_or_create_database_unchecked(
zookeeper: zookeeper.clone(),
s3: opt.s3_url.as_ref().map(|url| {
Arc::new(
Bucket::new(
opt.s3_bucket.as_deref().unwrap(),
Region::Custom { region: opt.s3_region.clone(), endpoint: url.clone() },
Credentials {
access_key: opt.s3_access_key.clone(),
secret_key: opt.s3_secret_key.clone(),
security_token: opt.s3_security_token.clone(),
session_token: None,
expiration: None,
},
)
Client::builder(url)
.unwrap()
.with_path_style(),
.key(opt.s3_access_key.as_ref().expect("Need s3 key to work").clone())
.secret(opt.s3_secret_key.as_ref().expect("Need s3 secret to work").clone())
.maybe_token(opt.s3_security_token.clone())
.build()
.bucket(opt.s3_bucket.as_ref().expect("Need an s3 bucket to work"))
.unwrap()
.get_or_create()
.unwrap(),
)
}),
}))

View File

@ -413,10 +413,11 @@ async fn document_addition(
if let Some(s3) = s3 {
update_file.seek(SeekFrom::Start(0)).unwrap();
let mut reader = BufReader::new(&*update_file);
match s3.put_object_stream(&mut reader, format!("/update-files/{}", uuid)) {
Ok(_) | Err(s3::error::S3Error::Http(_, _)) => (),
Err(e) => panic!("Error {}", e),
}
s3.put_object_multipart(
format!("update-files/{}", uuid),
&mut reader,
50 * 1024 * 1024,
)?;
}
// we NEED to persist the file here because we moved the `udpate_file` in another task.