Add the payload size to the parameters

This commit is contained in:
Kerollmops 2025-06-25 18:24:50 +02:00 committed by Clément Renault
parent a743da3061
commit 63031219c5
No known key found for this signature in database
GPG Key ID: F250A4C4E3AE5F5F
13 changed files with 130 additions and 41 deletions

1
Cargo.lock generated
View File

@ -3855,6 +3855,7 @@ dependencies = [
"anyhow",
"bumpalo",
"bumparaw-collections",
"byte-unit",
"convert_case 0.8.0",
"csv",
"deserr",

View File

@ -4,6 +4,7 @@
use std::collections::BTreeMap;
use meilisearch_types::batches::BatchId;
use meilisearch_types::byte_unit::Byte;
use meilisearch_types::error::ResponseError;
use meilisearch_types::keys::Key;
use meilisearch_types::milli::update::IndexDocumentsMethod;
@ -148,6 +149,7 @@ pub enum KindDump {
Export {
url: String,
api_key: Option<String>,
payload_size: Option<Byte>,
indexes: BTreeMap<String, ExportIndexSettings>,
},
UpgradeDatabase {
@ -222,9 +224,10 @@ impl From<KindWithContent> for KindDump {
KindDump::DumpCreation { keys, instance_uid }
}
KindWithContent::SnapshotCreation => KindDump::SnapshotCreation,
KindWithContent::Export { url, api_key, indexes } => KindDump::Export {
KindWithContent::Export { url, api_key, payload_size, indexes } => KindDump::Export {
url,
api_key,
payload_size,
indexes: indexes
.into_iter()
.map(|(pattern, settings)| (pattern.to_string(), settings))

View File

@ -212,20 +212,23 @@ impl<'a> Dump<'a> {
KindWithContent::DumpCreation { keys, instance_uid }
}
KindDump::SnapshotCreation => KindWithContent::SnapshotCreation,
KindDump::Export { url, indexes, api_key } => KindWithContent::Export {
url,
api_key,
indexes: indexes
.into_iter()
.map(|(pattern, settings)| {
Ok((
IndexUidPattern::try_from(pattern)
.map_err(|_| Error::CorruptedDump)?,
settings,
))
})
.collect::<Result<_, Error>>()?,
},
KindDump::Export { url, api_key, payload_size, indexes } => {
KindWithContent::Export {
url,
api_key,
payload_size,
indexes: indexes
.into_iter()
.map(|(pattern, settings)| {
Ok((
IndexUidPattern::try_from(pattern)
.map_err(|_| Error::CorruptedDump)?,
settings,
))
})
.collect::<Result<_, Error>>()?,
}
}
KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from },
},
};

View File

@ -289,8 +289,8 @@ fn snapshot_details(d: &Details) -> String {
Details::IndexSwap { swaps } => {
format!("{{ swaps: {swaps:?} }}")
}
Details::Export { url, api_key, indexes } => {
format!("{{ url: {url:?}, api_key: {api_key:?}, indexes: {indexes:?} }}")
Details::Export { url, api_key, payload_size, indexes } => {
format!("{{ url: {url:?}, api_key: {api_key:?}, payload_size: {payload_size:?}, indexes: {indexes:?} }}")
}
Details::UpgradeDatabase { from, to } => {
format!("{{ from: {from:?}, to: {to:?} }}")

View File

@ -362,12 +362,19 @@ impl IndexScheduler {
Ok((vec![task], ProcessBatchInfo::default()))
}
Batch::Export { mut task } => {
let KindWithContent::Export { url, indexes, api_key } = &task.kind else {
let KindWithContent::Export { url, api_key, payload_size, indexes } = &task.kind
else {
unreachable!()
};
let ret = catch_unwind(AssertUnwindSafe(|| {
self.process_export(url, indexes, api_key.as_deref(), progress)
self.process_export(
url,
api_key.as_deref(),
payload_size.as_ref(),
indexes,
progress,
)
}));
match ret {

View File

@ -4,6 +4,7 @@ use std::sync::atomic;
use std::time::Duration;
use backoff::ExponentialBackoff;
use byte_unit::Byte;
use flate2::write::GzEncoder;
use flate2::Compression;
use meilisearch_types::index_uid_pattern::IndexUidPattern;
@ -25,8 +26,9 @@ impl IndexScheduler {
pub(super) fn process_export(
&self,
base_url: &str,
indexes: &BTreeMap<IndexUidPattern, ExportIndexSettings>,
api_key: Option<&str>,
payload_size: Option<&Byte>,
indexes: &BTreeMap<IndexUidPattern, ExportIndexSettings>,
progress: Progress,
) -> Result<()> {
#[cfg(test)]
@ -122,7 +124,7 @@ impl IndexScheduler {
let (step, progress_step) = AtomicDocumentStep::new(total_documents);
progress.update_progress(progress_step);
let limit = 50 * 1024 * 1024; // 50 MiB
let limit = payload_size.map(|ps| ps.as_u64() as usize).unwrap_or(50 * 1024 * 1024); // defaults to 50 MiB
let documents_url = format!("{base_url}/indexes/{uid}/documents");
request_threads()

View File

@ -601,7 +601,7 @@ impl crate::IndexScheduler {
Details::Dump { dump_uid: _ } => {
assert_eq!(kind.as_kind(), Kind::DumpCreation);
}
Details::Export { url: _, api_key: _, indexes: _ } => {
Details::Export { url: _, api_key: _, payload_size: _, indexes: _ } => {
assert_eq!(kind.as_kind(), Kind::Export);
}
Details::UpgradeDatabase { from: _, to: _ } => {

View File

@ -15,6 +15,7 @@ actix-web = { version = "4.11.0", default-features = false }
anyhow = "1.0.98"
bumpalo = "3.18.1"
bumparaw-collections = "0.1.4"
byte-unit = { version = "5.1.6", features = ["serde"] }
convert_case = "0.8.0"
csv = "1.3.1"
deserr = { version = "0.6.3", features = ["actix-web"] }

View File

@ -392,6 +392,7 @@ InvalidSettingsIndexChat , InvalidRequest , BAD_REQU
// Export
InvalidExportUrl , InvalidRequest , BAD_REQUEST ;
InvalidExportApiKey , InvalidRequest , BAD_REQUEST ;
InvalidExportPayloadSize , InvalidRequest , BAD_REQUEST ;
InvalidExportIndexesPatterns , InvalidRequest , BAD_REQUEST ;
InvalidExportIndexSkipEmbeddings , InvalidRequest , BAD_REQUEST ;
InvalidExportIndexFilter , InvalidRequest , BAD_REQUEST ;

View File

@ -18,7 +18,7 @@ pub mod versioning;
pub use milli::{heed, Index};
use uuid::Uuid;
pub use versioning::VERSION_FILE_NAME;
pub use {milli, serde_cs};
pub use {byte_unit, milli, serde_cs};
pub type Document = serde_json::Map<String, serde_json::Value>;
pub type InstanceUid = Uuid;

View File

@ -1,5 +1,6 @@
use std::collections::BTreeMap;
use byte_unit::UnitType;
use milli::Object;
use serde::{Deserialize, Serialize};
use time::{Duration, OffsetDateTime};
@ -128,6 +129,8 @@ pub struct DetailsView {
#[serde(skip_serializing_if = "Option::is_none")]
pub api_key: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub payload_size: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub indexes: Option<BTreeMap<String, DetailsExportIndexSettings>>,
}
@ -263,6 +266,13 @@ impl DetailsView {
// So we return the first one we encounter but that shouldn't be an issue anyway.
(Some(left), Some(_right)) => Some(left),
},
payload_size: match (self.payload_size.clone(), other.payload_size.clone()) {
(None, None) => None,
(None, Some(size)) | (Some(size), None) => Some(size),
// We should never be able to batch multiple exports at the same time.
// So we return the first one we encounter but that shouldn't be an issue anyway.
(Some(left), Some(_right)) => Some(left),
},
indexes: match (self.indexes.clone(), other.indexes.clone()) {
(None, None) => None,
(None, Some(indexes)) | (Some(indexes), None) => Some(indexes),
@ -359,9 +369,11 @@ impl From<Details> for DetailsView {
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
Details::Export { url, api_key, indexes } => DetailsView {
Details::Export { url, api_key, payload_size, indexes } => DetailsView {
url: Some(url),
api_key,
payload_size: payload_size
.map(|ps| ps.get_appropriate_unit(UnitType::Both).to_string()),
indexes: Some(
indexes
.into_iter()

View File

@ -3,6 +3,7 @@ use std::collections::{BTreeMap, HashSet};
use std::fmt::{Display, Write};
use std::str::FromStr;
use byte_unit::Byte;
use enum_iterator::Sequence;
use milli::update::IndexDocumentsMethod;
use milli::Object;
@ -159,6 +160,7 @@ pub enum KindWithContent {
Export {
url: String,
api_key: Option<String>,
payload_size: Option<Byte>,
indexes: BTreeMap<IndexUidPattern, ExportIndexSettings>,
},
UpgradeDatabase {
@ -286,11 +288,14 @@ impl KindWithContent {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::Export { url, api_key, indexes } => Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
}),
KindWithContent::Export { url, api_key, payload_size, indexes } => {
Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
payload_size: payload_size.clone(),
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
})
}
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: (from.0, from.1, from.2),
to: (
@ -357,11 +362,14 @@ impl KindWithContent {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::Export { url, api_key, indexes } => Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
}),
KindWithContent::Export { url, api_key, payload_size, indexes } => {
Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
payload_size: payload_size.clone(),
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
})
}
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: *from,
to: (
@ -410,11 +418,14 @@ impl From<&KindWithContent> for Option<Details> {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::Export { url, api_key, indexes } => Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
}),
KindWithContent::Export { url, api_key, payload_size, indexes } => {
Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
payload_size: payload_size.clone(),
indexes: indexes.iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(),
})
}
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: *from,
to: (
@ -681,6 +692,7 @@ pub enum Details {
Export {
url: String,
api_key: Option<String>,
payload_size: Option<Byte>,
indexes: BTreeMap<IndexUidPattern, DetailsExportIndexSettings>,
},
UpgradeDatabase {

View File

@ -1,7 +1,10 @@
use std::collections::BTreeMap;
use std::convert::Infallible;
use std::str::FromStr as _;
use actix_web::web::{self, Data};
use actix_web::{HttpRequest, HttpResponse};
use byte_unit::Byte;
use deserr::actix_web::AwebJson;
use deserr::Deserr;
use index_scheduler::IndexScheduler;
@ -72,7 +75,7 @@ async fn export(
let export = export.into_inner();
debug!(returns = ?export, "Trigger export");
let Export { url, api_key, indexes } = export;
let Export { url, api_key, payload_size, indexes } = export;
let indexes = if indexes.is_empty() {
BTreeMap::from([(IndexUidPattern::new_unchecked("*"), DbExportIndexSettings::default())])
@ -85,7 +88,12 @@ async fn export(
.collect()
};
let task = KindWithContent::Export { url, api_key, indexes };
let task = KindWithContent::Export {
url,
api_key,
payload_size: payload_size.map(|ByteWithDeserr(bytes)| bytes),
indexes,
};
let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView =
@ -109,12 +117,51 @@ pub struct Export {
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidExportApiKey>)]
pub api_key: Option<String>,
#[schema(value_type = Option<String>, example = json!("24MiB"))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidExportPayloadSize>)]
pub payload_size: Option<ByteWithDeserr>,
#[schema(value_type = Option<BTreeSet<String>>, example = json!(["movies", "steam-*"]))]
#[deserr(default)]
#[serde(default)]
pub indexes: BTreeMap<IndexUidPattern, ExportIndexSettings>,
}
/// A wrapper around the `Byte` type that implements `Deserr`.
#[derive(Debug, Serialize)]
#[serde(transparent)]
pub struct ByteWithDeserr(pub Byte);
impl<E> deserr::Deserr<E> for ByteWithDeserr
where
E: deserr::DeserializeError,
{
fn deserialize_from_value<V: deserr::IntoValue>(
value: deserr::Value<V>,
location: deserr::ValuePointerRef,
) -> Result<Self, E> {
use deserr::{ErrorKind, Value, ValueKind};
match value {
Value::Integer(integer) => Ok(ByteWithDeserr(Byte::from_u64(integer))),
Value::String(string) => Byte::from_str(&string).map(ByteWithDeserr).map_err(|e| {
deserr::take_cf_content(E::error::<Infallible>(
None,
ErrorKind::Unexpected { msg: e.to_string() },
location,
))
}),
actual => Err(deserr::take_cf_content(E::error(
None,
ErrorKind::IncorrectValueKind {
actual,
accepted: &[ValueKind::Integer, ValueKind::String],
},
location,
))),
}
}
}
#[derive(Debug, Deserr, ToSchema, Serialize)]
#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)]
#[serde(rename_all = "camelCase")]