move to our new S3 lib

This commit is contained in:
Tamo 2023-09-28 11:18:56 +02:00
parent 6325cda74f
commit 98b67f217a
9 changed files with 116 additions and 212 deletions

214
Cargo.lock generated
View File

@ -428,54 +428,12 @@ dependencies = [
"critical-section", "critical-section",
] ]
[[package]]
name = "attohttpc"
version = "0.22.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1fcf00bc6d5abb29b5f97e3c61a90b6d3caa12f3faf897d4a3e3607c050a35a7"
dependencies = [
"http",
"log",
"rustls 0.20.8",
"serde",
"serde_json",
"url",
"webpki",
"webpki-roots 0.22.6",
]
[[package]] [[package]]
name = "autocfg" name = "autocfg"
version = "1.1.0" version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa" checksum = "d468802bab17cbc0cc575e9b053f41e72aa36bfa6b7f55e3529ffa43161b97fa"
[[package]]
name = "aws-creds"
version = "0.34.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3776743bb68d4ad02ba30ba8f64373f1be4e082fe47651767171ce75bb2f6cf5"
dependencies = [
"attohttpc",
"dirs",
"log",
"quick-xml",
"rust-ini",
"serde",
"thiserror",
"time",
"url",
]
[[package]]
name = "aws-region"
version = "0.25.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "056557a61427d0e5ba29dd931031c8ffed4ee7a550e7cd55692a9d8deb0a9dba"
dependencies = [
"thiserror",
]
[[package]] [[package]]
name = "backtrace" name = "backtrace"
version = "0.3.67" version = "0.3.67"
@ -1230,15 +1188,6 @@ dependencies = [
"subtle", "subtle",
] ]
[[package]]
name = "dirs"
version = "4.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ca3aa72a6f96ea37bbc5aa912f6788242832f75369bdfdadcb0e38423f100059"
dependencies = [
"dirs-sys",
]
[[package]] [[package]]
name = "dirs-next" name = "dirs-next"
version = "1.0.2" version = "1.0.2"
@ -1249,17 +1198,6 @@ dependencies = [
"dirs-sys-next", "dirs-sys-next",
] ]
[[package]]
name = "dirs-sys"
version = "0.3.7"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b1d1d91c932ef41c0f2663aa8b0ca0342d444d842c06914aa0a7e352d0bada6"
dependencies = [
"libc",
"redox_users",
"winapi 0.3.9",
]
[[package]] [[package]]
name = "dirs-sys-next" name = "dirs-sys-next"
version = "0.1.2" version = "0.1.2"
@ -1271,12 +1209,6 @@ dependencies = [
"winapi 0.3.9", "winapi 0.3.9",
] ]
[[package]]
name = "dlv-list"
version = "0.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0688c2a7f92e427f44895cd63841bff7b29f8d7a1648b9e7e07a4a365b2e1257"
[[package]] [[package]]
name = "dump" name = "dump"
version = "1.3.0" version = "1.3.0"
@ -1548,9 +1480,9 @@ checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
[[package]] [[package]]
name = "form_urlencoded" name = "form_urlencoded"
version = "1.1.0" version = "1.2.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a9c384f161156f5260c24a097c56119f9be8c798586aecc13afbcbe7b7e26bf8" checksum = "a62bc1cf6f830c2ec14a513a9fb124d0a213a629668a4186f329db21fe045652"
dependencies = [ dependencies = [
"percent-encoding", "percent-encoding",
] ]
@ -1980,9 +1912,9 @@ checksum = "b9e0384b61958566e926dc50660321d12159025e767c18e043daf26b70104c39"
[[package]] [[package]]
name = "idna" name = "idna"
version = "0.3.0" version = "0.4.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e14ddfc70884202db2244c223200c204c2bda1bc6e0998d11b5e024d657209e6" checksum = "7d20d6b07bfbc108882d88ed8e37d39636dcc260e15e30c45e6ba089610b917c"
dependencies = [ dependencies = [
"unicode-bidi", "unicode-bidi",
"unicode-normalization", "unicode-normalization",
@ -2011,9 +1943,9 @@ dependencies = [
"parking_lot", "parking_lot",
"puffin", "puffin",
"roaring", "roaring",
"rust-s3",
"serde", "serde",
"serde_json", "serde_json",
"strois",
"synchronoise", "synchronoise",
"tempfile", "tempfile",
"thiserror", "thiserror",
@ -2567,9 +2499,9 @@ dependencies = [
[[package]] [[package]]
name = "log" name = "log"
version = "0.4.19" version = "0.4.20"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "b06a4cde4c0f271a446782e3eff8de789548ce57dbc8eca9292c27f4a42004b4" checksum = "b5e6163cb8c49088c2c36f57875e58ccd8c87c7427f7fbd50ea6710b2f3f2e8f"
[[package]] [[package]]
name = "logging_timer" name = "logging_timer"
@ -2618,14 +2550,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d" checksum = "3e2e65a1a2e43cfcb47a895c4c8b10d1f4a61097f9f254f183aee60cad9c651d"
[[package]] [[package]]
name = "maybe-async" name = "md-5"
version = "0.2.7" version = "0.10.6"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0f1b8c13cb1f814b634a96b2c725449fe7ed464a7b8781de8688be5ffbd3f305" checksum = "d89e7ee0cfbedfc4da3340218492196241d89eefb6dab27de5df917a6d2e78cf"
dependencies = [ dependencies = [
"proc-macro2", "cfg-if 1.0.0",
"quote", "digest",
"syn 1.0.109",
] ]
[[package]] [[package]]
@ -2705,7 +2636,6 @@ dependencies = [
"rayon", "rayon",
"regex", "regex",
"reqwest", "reqwest",
"rust-s3",
"rustls 0.20.8", "rustls 0.20.8",
"rustls-pemfile", "rustls-pemfile",
"segment", "segment",
@ -2717,6 +2647,7 @@ dependencies = [
"siphasher", "siphasher",
"slice-group-by", "slice-group-by",
"static-files", "static-files",
"strois",
"sysinfo", "sysinfo",
"tar", "tar",
"temp-env", "temp-env",
@ -3094,16 +3025,6 @@ dependencies = [
"num-traits", "num-traits",
] ]
[[package]]
name = "ordered-multimap"
version = "0.4.3"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ccd746e37177e1711c20dd619a1620f34f5c8b569c53590a72dedd5344d8924a"
dependencies = [
"dlv-list",
"hashbrown 0.12.3",
]
[[package]] [[package]]
name = "page_size" name = "page_size"
version = "0.4.2" version = "0.4.2"
@ -3202,9 +3123,9 @@ dependencies = [
[[package]] [[package]]
name = "percent-encoding" name = "percent-encoding"
version = "2.2.0" version = "2.3.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "478c572c3d73181ff3c2539045f6eb99e5491218eae919370993b890cdbdd98e" checksum = "9b2a4787296e9989611394c33f193f676704af1686e70b8f8033ab5ba9a35a94"
[[package]] [[package]]
name = "permissive-json-pointer" name = "permissive-json-pointer"
@ -3463,9 +3384,9 @@ dependencies = [
[[package]] [[package]]
name = "quick-xml" name = "quick-xml"
version = "0.26.0" version = "0.30.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7f50b1c63b38611e7d4d7f68b82d3ad0cc71a2ad2e7f61fc10f1328d917c93cd" checksum = "eff6510e86862b57b210fd8cbe8ed3f0d7d600b9c2863cd4549a2e033c66e956"
dependencies = [ dependencies = [
"memchr", "memchr",
"serde", "serde",
@ -3678,45 +3599,6 @@ dependencies = [
"smallvec", "smallvec",
] ]
[[package]]
name = "rust-ini"
version = "0.18.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f6d5f2436026b4f6e79dc829837d467cc7e9a55ee40e750d716713540715a2df"
dependencies = [
"cfg-if 1.0.0",
"ordered-multimap",
]
[[package]]
name = "rust-s3"
version = "0.33.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1b2ac5ff6acfbe74226fa701b5ef793aaa054055c13ebb7060ad36942956e027"
dependencies = [
"async-trait",
"attohttpc",
"aws-creds",
"aws-region",
"base64 0.13.1",
"bytes 1.4.0",
"cfg-if 1.0.0",
"hex",
"hmac",
"http",
"log",
"maybe-async",
"md5",
"percent-encoding",
"quick-xml",
"serde",
"serde_derive",
"sha2",
"thiserror",
"time",
"url",
]
[[package]] [[package]]
name = "rustc-demangle" name = "rustc-demangle"
version = "0.1.23" version = "0.1.23"
@ -3822,6 +3704,24 @@ version = "1.0.12"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06" checksum = "4f3208ce4d8448b3f3e7d168a73f5e0c43a61e32930de3bceeccedb388b6bf06"
[[package]]
name = "rusty-s3"
version = "0.4.1"
source = "git+https://github.com/paolobarbolini/rusty-s3?rev=0b60b9d#0b60b9d10f2e3db01bb772768ae0afa0d57a7239"
dependencies = [
"base64 0.21.2",
"hmac",
"md-5",
"percent-encoding",
"quick-xml",
"serde",
"serde_json",
"sha2",
"time",
"url",
"zeroize",
]
[[package]] [[package]]
name = "ryu" name = "ryu"
version = "1.0.13" version = "1.0.13"
@ -3875,9 +3775,9 @@ checksum = "bebd363326d05ec3e2f532ab7660680f3b02130d780c299bca73469d521bc0ed"
[[package]] [[package]]
name = "serde" name = "serde"
version = "1.0.180" version = "1.0.188"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0ea67f183f058fe88a4e3ec6e2788e003840893b91bac4559cabedd00863b3ed" checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
dependencies = [ dependencies = [
"serde_derive", "serde_derive",
] ]
@ -3902,9 +3802,9 @@ dependencies = [
[[package]] [[package]]
name = "serde_derive" name = "serde_derive"
version = "1.0.180" version = "1.0.188"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "24e744d7782b686ab3b73267ef05697159cc0e5abbed3f47f9933165e5219036" checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -4106,6 +4006,21 @@ version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f" checksum = "a2eb9349b6444b326872e140eb1cf5e7c522154d69e7a0ffb0fb81c06b37543f"
[[package]]
name = "strois"
version = "0.1.0"
source = "git+http://github.com/meilisearch/strois?branch=main#f977fd50e8e082d631a1dbf98f4e75e263112981"
dependencies = [
"http",
"log",
"quick-xml",
"rusty-s3",
"serde",
"thiserror",
"ureq",
"url",
]
[[package]] [[package]]
name = "strsim" name = "strsim"
version = "0.10.0" version = "0.10.0"
@ -4220,18 +4135,18 @@ dependencies = [
[[package]] [[package]]
name = "thiserror" name = "thiserror"
version = "1.0.44" version = "1.0.49"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "611040a08a0439f8248d1990b111c95baa9c704c805fa1f62104b39655fd7f90" checksum = "1177e8c6d7ede7afde3585fd2513e611227efd6481bd78d2e82ba1ce16557ed4"
dependencies = [ dependencies = [
"thiserror-impl", "thiserror-impl",
] ]
[[package]] [[package]]
name = "thiserror-impl" name = "thiserror-impl"
version = "1.0.44" version = "1.0.49"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "090198534930841fab3a5d1bb637cde49e339654e606195f8d9c76eeb081dc96" checksum = "10712f02019e9288794769fba95cd6847df9874d49d871d062172f9dd41bc4cc"
dependencies = [ dependencies = [
"proc-macro2", "proc-macro2",
"quote", "quote",
@ -4500,19 +4415,22 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9" checksum = "0b11c96ac7ee530603dcdf68ed1557050f374ce55a5a07193ebf8cbc9f8927e9"
dependencies = [ dependencies = [
"base64 0.21.2", "base64 0.21.2",
"flate2",
"log", "log",
"once_cell", "once_cell",
"rustls 0.21.1", "rustls 0.21.1",
"rustls-webpki", "rustls-webpki",
"serde",
"serde_json",
"url", "url",
"webpki-roots 0.23.1", "webpki-roots 0.23.1",
] ]
[[package]] [[package]]
name = "url" name = "url"
version = "2.3.1" version = "2.4.1"
source = "registry+https://github.com/rust-lang/crates.io-index" source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "0d68c799ae75762b8c3fe375feb6600ef5602c883c5d21eb51c09f22b83c4643" checksum = "143b538f18257fac9cad154828a57c6bf5157e1aa604d4816b5995bf6de87ae5"
dependencies = [ dependencies = [
"form_urlencoded", "form_urlencoded",
"idna", "idna",
@ -4993,6 +4911,12 @@ dependencies = [
"synstructure", "synstructure",
] ]
[[package]]
name = "zeroize"
version = "1.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2a0956f1ba7c7909bfb66c2e9e4124ab6f6482560f6628b5aaeba39207c9aad9"
[[package]] [[package]]
name = "zip" name = "zip"
version = "0.6.6" version = "0.6.6"

View File

@ -34,7 +34,7 @@ uuid = { version = "1.3.1", features = ["serde", "v4"] }
tokio = { version = "1.27.0", features = ["full"] } tokio = { version = "1.27.0", features = ["full"] }
zookeeper = "0.8.0" zookeeper = "0.8.0"
parking_lot = "0.12.1" parking_lot = "0.12.1"
rust-s3 = { version = "0.33.0", default-features = false, features = ["sync-rustls-tls"] } strois = { git = "http://github.com/meilisearch/strois", branch = "main" }
[dev-dependencies] [dev-dependencies]
big_s = "1.0.2" big_s = "1.0.2"

View File

@ -46,6 +46,7 @@ use dump::{KindDump, TaskDump, UpdateFile};
pub use error::Error; pub use error::Error;
pub use features::RoFeatures; pub use features::RoFeatures;
use file_store::FileStore; use file_store::FileStore;
use strois::Bucket;
use meilisearch_types::error::ResponseError; use meilisearch_types::error::ResponseError;
use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures}; use meilisearch_types::features::{InstanceTogglableFeatures, RuntimeTogglableFeatures};
use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str}; use meilisearch_types::heed::types::{OwnedType, SerdeBincode, SerdeJson, Str};
@ -56,7 +57,6 @@ use meilisearch_types::milli::{self, CboRoaringBitmapCodec, Index, RoaringBitmap
use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task}; use meilisearch_types::tasks::{Kind, KindWithContent, Status, Task};
use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard}; use parking_lot::{MappedRwLockReadGuard, RwLock, RwLockReadGuard};
use roaring::RoaringBitmap; use roaring::RoaringBitmap;
use s3::Bucket;
use synchronoise::SignalEvent; use synchronoise::SignalEvent;
use tempfile::TempDir; use tempfile::TempDir;
use time::format_description::well_known::Rfc3339; use time::format_description::well_known::Rfc3339;
@ -74,6 +74,9 @@ use crate::utils::{check_index_swap_validity, clamp_to_page_size};
pub(crate) type BEI128 = pub(crate) type BEI128 =
meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>; meilisearch_types::heed::zerocopy::I128<meilisearch_types::heed::byteorder::BE>;
/// This is the size of the buffer we use to send each part of a multipart request to S3.
pub const S3_PART_SIZE: usize = 50 * 1024 * 1024; // 50MiB
/// Defines a subset of tasks to be retrieved from the [`IndexScheduler`]. /// Defines a subset of tasks to be retrieved from the [`IndexScheduler`].
/// ///
/// An empty/default query (where each field is set to `None`) matches all tasks. /// An empty/default query (where each field is set to `None`) matches all tasks.
@ -467,14 +470,7 @@ impl IndexScheduler {
.unwrap(); .unwrap();
let s3 = inner.options.s3.as_ref().unwrap(); let s3 = inner.options.s3.as_ref().unwrap();
let task = let task =
s3.get_object(format!("/tasks/{id:0>10}")).unwrap(); s3.get_object_json(format!("tasks/{id:0>10}")).unwrap();
assert_eq!(
task.status_code(),
200,
"could not reach the s3: {:?}",
task.as_str()
);
let task = serde_json::from_slice(task.as_slice()).unwrap();
inner.register_raw_task(&mut wtxn, &task).unwrap(); inner.register_raw_task(&mut wtxn, &task).unwrap();
// we received a new tasks, we must wake up // we received a new tasks, we must wake up
self.wake_up.signal(); self.wake_up.signal();
@ -511,14 +507,8 @@ impl IndexScheduler {
.map(|(_, id)| id.parse::<u32>().unwrap()) .map(|(_, id)| id.parse::<u32>().unwrap())
.unwrap(); .unwrap();
let s3 = inner.options.s3.as_ref().unwrap(); let s3 = inner.options.s3.as_ref().unwrap();
let task = s3.get_object(format!("tasks/{id:0>10}")).unwrap(); let task =
assert_eq!( s3.get_object_json(format!("tasks/{id:0>10}")).unwrap();
task.status_code(),
200,
"could not reach the s3: {:?}",
task.as_str()
);
let task = serde_json::from_slice(task.as_slice()).unwrap();
inner.register_raw_task(&mut wtxn, &task).unwrap(); inner.register_raw_task(&mut wtxn, &task).unwrap();
wtxn.commit().unwrap(); wtxn.commit().unwrap();
} }
@ -589,11 +579,7 @@ impl IndexScheduler {
); );
let mut version_file_path = let mut version_file_path =
File::open(&inner.version_file_path).unwrap(); File::open(&inner.version_file_path).unwrap();
s3.put_object_stream(&mut version_file_path, dst) s3.put_object_multipart(dst, &mut version_file_path, S3_PART_SIZE)
.or_else(|e| match e {
s3::error::S3Error::Http(404, _) => Ok(200),
e => Err(e),
})
.unwrap(); .unwrap();
version_file_path.sync_data().unwrap(); version_file_path.sync_data().unwrap();
drop(version_file_path); drop(version_file_path);
@ -610,14 +596,11 @@ impl IndexScheduler {
heed::CompactionOption::Enabled, heed::CompactionOption::Enabled,
) )
.unwrap(); .unwrap();
s3.put_object_stream( s3.put_object_multipart(
&mut file,
format!("{snapshot_dir}/tasks.mdb"), format!("{snapshot_dir}/tasks.mdb"),
&mut file,
S3_PART_SIZE,
) )
.or_else(|e| match e {
s3::error::S3Error::Http(404, _) => Ok(200),
e => Err(e),
})
.unwrap(); .unwrap();
temp.close().unwrap(); temp.close().unwrap();
@ -637,11 +620,11 @@ impl IndexScheduler {
heed::CompactionOption::Enabled, heed::CompactionOption::Enabled,
) )
.unwrap(); .unwrap();
s3.put_object_stream(&mut file, format!("{dst}/{uuid}.mdb")) s3.put_object_multipart(
.or_else(|e| match e { format!("{dst}/{uuid}.mdb"),
s3::error::S3Error::Http(404, _) => Ok(200), &mut file,
e => Err(e), S3_PART_SIZE,
}) )
.unwrap(); .unwrap();
temp.close().unwrap(); temp.close().unwrap();
} }
@ -925,26 +908,23 @@ fn load_snapshot(this: &IndexScheduler, path: &str) -> anyhow::Result<()> {
log::info!("Downloading the index scheduler database."); log::info!("Downloading the index scheduler database.");
let tasks_snapshot = format!("{path}/tasks.mdb"); let tasks_snapshot = format!("{path}/tasks.mdb");
let status = s3.get_object_to_writer(tasks_snapshot, &mut tasks_file)?; s3.get_object_to_writer(tasks_snapshot, &mut tasks_file)?;
assert!(matches!(status, 200 | 202));
log::info!("Downloading the indexes databases"); log::info!("Downloading the indexes databases");
let indexes_files = tempfile::TempDir::new_in(&base_path)?; let indexes_files = tempfile::TempDir::new_in(&base_path)?;
let src = format!("{path}/indexes"); let src = format!("{path}/indexes");
let uuids = s3.list(src.clone(), None)?.into_iter().flat_map(|lbr| { let uuids = s3.list_objects(&src)?.into_iter().map(|lbr| {
lbr.contents.into_iter().map(|o| { let key = lbr.unwrap().key;
let (_, name) = o.key.rsplit_once('/').unwrap(); let (_, name) = key.rsplit_once('/').unwrap();
name.to_string() name.to_string()
})
}); });
for uuid in uuids { for uuid in uuids {
log::info!("\tDownloading the index {}", uuid); log::info!("\tDownloading the index {}", uuid);
std::fs::create_dir_all(indexes_files.path().join(&uuid).with_extension(""))?; std::fs::create_dir_all(indexes_files.path().join(&uuid).with_extension(""))?;
let path = indexes_files.path().join(&uuid).with_extension("").join("data.mdb"); let path = indexes_files.path().join(&uuid).with_extension("").join("data.mdb");
let mut file = File::create(path)?; let mut file = File::create(path)?;
let status = s3.get_object_to_writer(format!("{src}/{uuid}"), &mut file)?; s3.get_object_to_writer(format!("{src}/{uuid}"), &mut file)?;
assert!(matches!(status, 200 | 202));
} }
// 3. Lock the index-mapper and close all the env // 3. Lock the index-mapper and close all the env
@ -1273,7 +1253,7 @@ impl IndexSchedulerInner {
for uuid in batch.content_uuids() { for uuid in batch.content_uuids() {
// TODO use a real UUIDv4 // TODO use a real UUIDv4
let (_, file) = self.file_store.new_update_with_uuid(uuid.as_u128())?; let (_, file) = self.file_store.new_update_with_uuid(uuid.as_u128())?;
s3.get_object_to_writer(&format!("/update-files/{}", uuid), &mut &*file).unwrap(); s3.get_object_to_writer(&format!("update-files/{}", uuid), &mut &*file).unwrap();
file.persist()?; file.persist()?;
} }
} }

View File

@ -1,4 +1,3 @@
use std::borrow::Borrow;
use std::fmt::{self, Debug, Display}; use std::fmt::{self, Debug, Display};
use std::fs::File; use std::fs::File;
use std::io::{self, Seek, Write}; use std::io::{self, Seek, Write};
@ -42,7 +41,7 @@ impl Display for DocumentFormatError {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self { match self {
Self::Io(e) => write!(f, "{e}"), Self::Io(e) => write!(f, "{e}"),
Self::MalformedPayload(me, b) => match me.borrow() { Self::MalformedPayload(me, b) => match me {
Error::Json(se) => { Error::Json(se) => {
let mut message = match se.classify() { let mut message = match se.classify() {
Category::Data => { Category::Data => {

View File

@ -175,6 +175,7 @@ macro_rules! make_error_codes {
// An exhaustive list of all the error codes used by meilisearch. // An exhaustive list of all the error codes used by meilisearch.
make_error_codes! { make_error_codes! {
S3Error , System , INTERNAL_SERVER_ERROR;
ApiKeyAlreadyExists , InvalidRequest , CONFLICT ; ApiKeyAlreadyExists , InvalidRequest , CONFLICT ;
ApiKeyNotFound , InvalidRequest , NOT_FOUND ; ApiKeyNotFound , InvalidRequest , NOT_FOUND ;
BadParameter , InvalidRequest , BAD_REQUEST; BadParameter , InvalidRequest , BAD_REQUEST;

View File

@ -80,7 +80,7 @@ reqwest = { version = "0.11.16", features = [
], default-features = false } ], default-features = false }
rustls = "0.20.8" rustls = "0.20.8"
rustls-pemfile = "1.0.2" rustls-pemfile = "1.0.2"
rust-s3 = { version = "0.33.0", default-features = false, features = ["sync-rustls-tls"] } strois = { git = "http://github.com/meilisearch/strois", branch = "main" }
segment = { version = "0.2.2", optional = true } segment = { version = "0.2.2", optional = true }
serde = { version = "1.0.160", features = ["derive"] } serde = { version = "1.0.160", features = ["derive"] }
serde_json = { version = "1.0.95", features = ["preserve_order"] } serde_json = { version = "1.0.95", features = ["preserve_order"] }

View File

@ -33,6 +33,8 @@ pub enum MeilisearchHttpError {
.0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len() .0.iter().map(|uid| format!("\"{uid}\"")).collect::<Vec<_>>().join(", "), .0.len()
)] )]
SwapIndexPayloadWrongLength(Vec<IndexUid>), SwapIndexPayloadWrongLength(Vec<IndexUid>),
#[error("S3 Error: {0}")]
S3Error(#[from] strois::Error),
#[error(transparent)] #[error(transparent)]
IndexUid(#[from] IndexUidFormatError), IndexUid(#[from] IndexUidFormatError),
#[error(transparent)] #[error(transparent)]
@ -65,6 +67,7 @@ impl ErrorCode for MeilisearchHttpError {
MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter, MeilisearchHttpError::InvalidExpression(_, _) => Code::InvalidSearchFilter,
MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge, MeilisearchHttpError::PayloadTooLarge(_) => Code::PayloadTooLarge,
MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes, MeilisearchHttpError::SwapIndexPayloadWrongLength(_) => Code::InvalidSwapIndexes,
MeilisearchHttpError::S3Error(_) => Code::S3Error,
MeilisearchHttpError::IndexUid(e) => e.error_code(), MeilisearchHttpError::IndexUid(e) => e.error_code(),
MeilisearchHttpError::SerdeJson(_) => Code::Internal, MeilisearchHttpError::SerdeJson(_) => Code::Internal,
MeilisearchHttpError::HeedError(_) => Code::Internal, MeilisearchHttpError::HeedError(_) => Code::Internal,

View File

@ -30,6 +30,7 @@ use extractors::payload::PayloadConfig;
use http::header::CONTENT_TYPE; use http::header::CONTENT_TYPE;
use index_scheduler::{IndexScheduler, IndexSchedulerOptions}; use index_scheduler::{IndexScheduler, IndexSchedulerOptions};
use log::error; use log::error;
use strois::Client;
use meilisearch_auth::AuthController; use meilisearch_auth::AuthController;
use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader}; use meilisearch_types::milli::documents::{DocumentsBatchBuilder, DocumentsBatchReader};
use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod}; use meilisearch_types::milli::update::{IndexDocumentsConfig, IndexDocumentsMethod};
@ -39,8 +40,6 @@ use meilisearch_types::versioning::{check_version_file, create_version_file};
use meilisearch_types::{compression, milli, VERSION_FILE_NAME}; use meilisearch_types::{compression, milli, VERSION_FILE_NAME};
pub use option::Opt; pub use option::Opt;
use option::ScheduleSnapshot; use option::ScheduleSnapshot;
use s3::creds::Credentials;
use s3::{Bucket, Region};
use zookeeper::ZooKeeper; use zookeeper::ZooKeeper;
use crate::error::MeilisearchHttpError; use crate::error::MeilisearchHttpError;
@ -246,19 +245,16 @@ fn open_or_create_database_unchecked(
zookeeper: zookeeper.clone(), zookeeper: zookeeper.clone(),
s3: opt.s3_url.as_ref().map(|url| { s3: opt.s3_url.as_ref().map(|url| {
Arc::new( Arc::new(
Bucket::new( Client::builder(url)
opt.s3_bucket.as_deref().unwrap(),
Region::Custom { region: opt.s3_region.clone(), endpoint: url.clone() },
Credentials {
access_key: opt.s3_access_key.clone(),
secret_key: opt.s3_secret_key.clone(),
security_token: opt.s3_security_token.clone(),
session_token: None,
expiration: None,
},
)
.unwrap() .unwrap()
.with_path_style(), .key(opt.s3_access_key.as_ref().expect("Need s3 key to work").clone())
.secret(opt.s3_secret_key.as_ref().expect("Need s3 secret to work").clone())
.maybe_token(opt.s3_security_token.clone())
.build()
.bucket(opt.s3_bucket.as_ref().expect("Need an s3 bucket to work"))
.unwrap()
.get_or_create()
.unwrap(),
) )
}), }),
})) }))

View File

@ -413,10 +413,11 @@ async fn document_addition(
if let Some(s3) = s3 { if let Some(s3) = s3 {
update_file.seek(SeekFrom::Start(0)).unwrap(); update_file.seek(SeekFrom::Start(0)).unwrap();
let mut reader = BufReader::new(&*update_file); let mut reader = BufReader::new(&*update_file);
match s3.put_object_stream(&mut reader, format!("/update-files/{}", uuid)) { s3.put_object_multipart(
Ok(_) | Err(s3::error::S3Error::Http(_, _)) => (), format!("update-files/{}", uuid),
Err(e) => panic!("Error {}", e), &mut reader,
} 50 * 1024 * 1024,
)?;
} }
// we NEED to persist the file here because we moved the `udpate_file` in another task. // we NEED to persist the file here because we moved the `udpate_file` in another task.