From 23e25a437cad8f5a0a77230e261f3e614092abda Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Cl=C3=A9ment=20Renault?= Date: Sat, 14 Jun 2025 11:39:53 +0200 Subject: [PATCH] Working first implementation --- crates/dump/src/lib.rs | 25 ++-- crates/index-scheduler/src/dump.rs | 27 ++-- crates/index-scheduler/src/error.rs | 4 + crates/index-scheduler/src/insta_snapshot.rs | 4 +- crates/index-scheduler/src/scheduler/mod.rs | 1 + .../src/scheduler/process_batch.rs | 45 ++++-- .../src/scheduler/process_export.rs | 141 ++++++++++++++++++ .../mod.rs => process_upgrade.rs} | 0 crates/index-scheduler/src/test_utils.rs | 1 + crates/index-scheduler/src/utils.rs | 7 +- crates/meilisearch-types/src/error.rs | 3 +- .../src/index_uid_pattern.rs | 2 +- crates/meilisearch-types/src/task_view.rs | 36 +++-- crates/meilisearch-types/src/tasks.rs | 71 +++++---- crates/meilisearch/src/routes/export.rs | 34 ++++- 15 files changed, 298 insertions(+), 103 deletions(-) create mode 100644 crates/index-scheduler/src/scheduler/process_export.rs rename crates/index-scheduler/src/scheduler/{process_upgrade/mod.rs => process_upgrade.rs} (100%) diff --git a/crates/dump/src/lib.rs b/crates/dump/src/lib.rs index 29007e9ce..5c67d7a94 100644 --- a/crates/dump/src/lib.rs +++ b/crates/dump/src/lib.rs @@ -1,12 +1,16 @@ #![allow(clippy::type_complexity)] #![allow(clippy::wrong_self_convention)] +use std::collections::BTreeMap; + use meilisearch_types::batches::BatchId; use meilisearch_types::error::ResponseError; use meilisearch_types::keys::Key; use meilisearch_types::milli::update::IndexDocumentsMethod; use meilisearch_types::settings::Unchecked; -use meilisearch_types::tasks::{Details, IndexSwap, KindWithContent, Status, Task, TaskId}; +use meilisearch_types::tasks::{ + Details, ExportIndexSettings, IndexSwap, KindWithContent, Status, Task, TaskId, +}; use meilisearch_types::InstanceUid; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize}; @@ -143,9 +147,8 @@ pub enum KindDump { SnapshotCreation, Export { url: String, - indexes: Vec, - skip_embeddings: bool, api_key: Option, + indexes: BTreeMap, }, UpgradeDatabase { from: (u32, u32, u32), @@ -219,14 +222,14 @@ impl From for KindDump { KindDump::DumpCreation { keys, instance_uid } } KindWithContent::SnapshotCreation => KindDump::SnapshotCreation, - KindWithContent::Export { url, indexes, skip_embeddings, api_key } => { - KindDump::Export { - url, - indexes: indexes.into_iter().map(|pattern| pattern.to_string()).collect(), - skip_embeddings, - api_key, - } - } + KindWithContent::Export { url, api_key, indexes } => KindDump::Export { + url, + api_key, + indexes: indexes + .into_iter() + .map(|(pattern, settings)| (pattern.to_string(), settings)) + .collect(), + }, KindWithContent::UpgradeDatabase { from: version } => { KindDump::UpgradeDatabase { from: version } } diff --git a/crates/index-scheduler/src/dump.rs b/crates/index-scheduler/src/dump.rs index 457d80597..2a99a74aa 100644 --- a/crates/index-scheduler/src/dump.rs +++ b/crates/index-scheduler/src/dump.rs @@ -212,19 +212,20 @@ impl<'a> Dump<'a> { KindWithContent::DumpCreation { keys, instance_uid } } KindDump::SnapshotCreation => KindWithContent::SnapshotCreation, - KindDump::Export { url, indexes, skip_embeddings, api_key } => { - KindWithContent::Export { - url, - indexes: indexes - .into_iter() - .map(|index| { - IndexUidPattern::try_from(index).map_err(|_| Error::CorruptedDump) - }) - .collect::, Error>>()?, - skip_embeddings, - api_key, - } - } + KindDump::Export { url, indexes, api_key } => KindWithContent::Export { + url, + api_key, + indexes: indexes + .into_iter() + .map(|(pattern, settings)| { + Ok(( + IndexUidPattern::try_from(pattern) + .map_err(|_| Error::CorruptedDump)?, + settings, + )) + }) + .collect::>()?, + }, KindDump::UpgradeDatabase { from } => KindWithContent::UpgradeDatabase { from }, }, }; diff --git a/crates/index-scheduler/src/error.rs b/crates/index-scheduler/src/error.rs index cb798b385..2020ac597 100644 --- a/crates/index-scheduler/src/error.rs +++ b/crates/index-scheduler/src/error.rs @@ -151,6 +151,8 @@ pub enum Error { CorruptedTaskQueue, #[error(transparent)] DatabaseUpgrade(Box), + #[error(transparent)] + Export(Box), #[error("Failed to rollback for index `{index}`: {rollback_outcome} ")] RollbackFailed { index: String, rollback_outcome: RollbackOutcome }, #[error(transparent)] @@ -221,6 +223,7 @@ impl Error { | Error::IoError(_) | Error::Persist(_) | Error::FeatureNotEnabled(_) + | Error::Export(_) | Error::Anyhow(_) => true, Error::CreateBatch(_) | Error::CorruptedTaskQueue @@ -294,6 +297,7 @@ impl ErrorCode for Error { Error::CorruptedTaskQueue => Code::Internal, Error::CorruptedDump => Code::Internal, Error::DatabaseUpgrade(_) => Code::Internal, + Error::Export(_) => Code::Internal, Error::RollbackFailed { .. } => Code::Internal, Error::UnrecoverableError(_) => Code::Internal, Error::IndexSchedulerVersionMismatch { .. } => Code::Internal, diff --git a/crates/index-scheduler/src/insta_snapshot.rs b/crates/index-scheduler/src/insta_snapshot.rs index d1db77b2f..138b591ff 100644 --- a/crates/index-scheduler/src/insta_snapshot.rs +++ b/crates/index-scheduler/src/insta_snapshot.rs @@ -289,8 +289,8 @@ fn snapshot_details(d: &Details) -> String { Details::IndexSwap { swaps } => { format!("{{ swaps: {swaps:?} }}") } - Details::Export { url, api_key, exported_documents, skip_embeddings } => { - format!("{{ url: {url:?}, api_key: {api_key:?}, exported_documents: {exported_documents:?}, skip_embeddings: {skip_embeddings:?} }}") + Details::Export { url, api_key, indexes } => { + format!("{{ url: {url:?}, api_key: {api_key:?}, indexes: {indexes:?} }}") } Details::UpgradeDatabase { from, to } => { format!("{{ from: {from:?}, to: {to:?} }}") diff --git a/crates/index-scheduler/src/scheduler/mod.rs b/crates/index-scheduler/src/scheduler/mod.rs index 0e258e27b..5ac591143 100644 --- a/crates/index-scheduler/src/scheduler/mod.rs +++ b/crates/index-scheduler/src/scheduler/mod.rs @@ -4,6 +4,7 @@ mod autobatcher_test; mod create_batch; mod process_batch; mod process_dump_creation; +mod process_export; mod process_index_operation; mod process_snapshot_creation; mod process_upgrade; diff --git a/crates/index-scheduler/src/scheduler/process_batch.rs b/crates/index-scheduler/src/scheduler/process_batch.rs index 1f6c4eb2c..99278756d 100644 --- a/crates/index-scheduler/src/scheduler/process_batch.rs +++ b/crates/index-scheduler/src/scheduler/process_batch.rs @@ -1,7 +1,6 @@ use std::collections::{BTreeSet, HashMap, HashSet}; use std::panic::{catch_unwind, AssertUnwindSafe}; use std::sync::atomic::Ordering; -use std::time::Duration; use meilisearch_types::batches::{BatchEnqueuedAt, BatchId}; use meilisearch_types::heed::{RoTxn, RwTxn}; @@ -14,9 +13,9 @@ use roaring::RoaringBitmap; use super::create_batch::Batch; use crate::processing::{ - AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, Export, - FinalizingIndexStep, InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, - TaskDeletionProgress, UpdateIndexProgress, + AtomicBatchStep, AtomicTaskStep, CreateIndexProgress, DeleteIndexProgress, FinalizingIndexStep, + InnerSwappingTwoIndexes, SwappingTheIndexes, TaskCancelationProgress, TaskDeletionProgress, + UpdateIndexProgress, }; use crate::utils::{ self, remove_n_tasks_datetime_earlier_than, remove_task_datetime, swap_index_uid_in_task, @@ -363,18 +362,32 @@ impl IndexScheduler { Ok((vec![task], ProcessBatchInfo::default())) } Batch::Export { mut task } => { - progress.update_progress(Export::EnsuringCorrectnessOfTheTarget); - - // TODO send check requests with the API Key - - let mut wtxn = self.env.write_txn()?; - let KindWithContent::Export { url, indexes, skip_embeddings, api_key } = &task.kind - else { + let KindWithContent::Export { url, indexes, api_key } = &task.kind else { unreachable!() }; - eprintln!("Exporting data to {}...", url); - std::thread::sleep(Duration::from_secs(30)); + let ret = catch_unwind(AssertUnwindSafe(|| { + self.process_export(url, indexes, api_key.as_deref(), progress) + })); + + match ret { + // TODO return the matched and exported documents + Ok(Ok(())) => (), + Ok(Err(Error::AbortedTask)) => return Err(Error::AbortedTask), + Ok(Err(e)) => return Err(Error::Export(Box::new(e))), + Err(e) => { + let msg = match e.downcast_ref::<&'static str>() { + Some(s) => *s, + None => match e.downcast_ref::() { + Some(s) => &s[..], + None => "Box", + }, + }; + return Err(Error::Export(Box::new(Error::ProcessBatchPanicked( + msg.to_string(), + )))); + } + } task.status = Status::Succeeded; Ok((vec![task], ProcessBatchInfo::default())) @@ -726,9 +739,11 @@ impl IndexScheduler { from.1, from.2 ); - match std::panic::catch_unwind(std::panic::AssertUnwindSafe(|| { + let ret = catch_unwind(std::panic::AssertUnwindSafe(|| { self.process_rollback(from, progress) - })) { + })); + + match ret { Ok(Ok(())) => {} Ok(Err(err)) => return Err(Error::DatabaseUpgrade(Box::new(err))), Err(e) => { diff --git a/crates/index-scheduler/src/scheduler/process_export.rs b/crates/index-scheduler/src/scheduler/process_export.rs new file mode 100644 index 000000000..e01ddf2e4 --- /dev/null +++ b/crates/index-scheduler/src/scheduler/process_export.rs @@ -0,0 +1,141 @@ +use std::collections::BTreeMap; +use std::time::Duration; + +use meilisearch_types::index_uid_pattern::IndexUidPattern; +use meilisearch_types::milli::progress::{Progress, VariableNameStep}; +use meilisearch_types::milli::{obkv_to_json, Filter}; +use meilisearch_types::settings::{self, SecretPolicy}; +use meilisearch_types::tasks::ExportIndexSettings; +use ureq::{json, Agent}; + +use crate::{Error, IndexScheduler, Result}; + +impl IndexScheduler { + pub(super) fn process_export( + &self, + url: &str, + indexes: &BTreeMap, + api_key: Option<&str>, + progress: Progress, + ) -> Result<()> { + #[cfg(test)] + self.maybe_fail(crate::test_utils::FailureLocation::ProcessExport)?; + + let indexes: Vec<_> = self + .index_names()? + .into_iter() + .flat_map(|uid| { + indexes + .iter() + .find(|(pattern, _)| pattern.matches_str(&uid)) + .map(|(_pattern, settings)| (uid, settings)) + }) + .collect(); + + let agent: Agent = ureq::AgentBuilder::new().timeout(Duration::from_secs(5)).build(); + + for (i, (uid, settings)) in indexes.iter().enumerate() { + let must_stop_processing = self.scheduler.must_stop_processing.clone(); + if must_stop_processing.get() { + return Err(Error::AbortedTask); + } + + progress.update_progress(VariableNameStep::::new( + format!("Exporting index `{uid}`"), + i as u32, + indexes.len() as u32, + )); + + let ExportIndexSettings { skip_embeddings, filter } = settings; + let index = self.index(uid)?; + let index_rtxn = index.read_txn()?; + + // Send the primary key + let primary_key = index.primary_key(&index_rtxn).unwrap(); + // TODO implement retry logic + let mut request = agent.post(&format!("{url}/indexes")); + if let Some(api_key) = api_key { + request = request.set("Authorization", &format!("Bearer {api_key}")); + } + request.send_json(&json!({ "uid": uid, "primaryKey": primary_key })).unwrap(); + + // Send the index settings + let settings = settings::settings(&index, &index_rtxn, SecretPolicy::RevealSecrets) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + // TODO implement retry logic + // improve error reporting (get error message) + let mut request = agent.patch(&format!("{url}/indexes/{uid}/settings")); + if let Some(api_key) = api_key { + request = request.set("Authorization", &format!("Bearer {api_key}")); + } + request.send_json(settings).unwrap(); + + let filter = filter + .as_deref() + .map(Filter::from_str) + .transpose() + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))? + .flatten(); + + let filter_universe = filter + .map(|f| f.evaluate(&index_rtxn, &index)) + .transpose() + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + let whole_universe = index + .documents_ids(&index_rtxn) + .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?; + let universe = filter_universe.unwrap_or(whole_universe); + + let fields_ids_map = index.fields_ids_map(&index_rtxn)?; + let all_fields: Vec<_> = fields_ids_map.iter().map(|(id, _)| id).collect(); + let embedding_configs = index + .embedding_configs(&index_rtxn) + .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?; + + let limit = 50 * 1024 * 1024; // 50 MiB + let mut buffer = Vec::new(); + let mut tmp_buffer = Vec::new(); + for docid in universe { + let document = index + .document(&index_rtxn, docid) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + let value = obkv_to_json(&all_fields, &fields_ids_map, document) + .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; + + tmp_buffer.clear(); + serde_json::to_writer(&mut tmp_buffer, &value) + .map_err(meilisearch_types::milli::InternalError::from) + .map_err(|e| Error::from_milli(e.into(), Some(uid.to_string())))?; + + if buffer.len() + tmp_buffer.len() > limit { + // TODO implement retry logic + post_serialized_documents(&agent, url, uid, api_key, &buffer).unwrap(); + buffer.clear(); + } + buffer.extend_from_slice(&tmp_buffer); + } + + post_serialized_documents(&agent, url, uid, api_key, &buffer).unwrap(); + } + + Ok(()) + } +} + +fn post_serialized_documents( + agent: &Agent, + url: &str, + uid: &str, + api_key: Option<&str>, + buffer: &[u8], +) -> Result { + let mut request = agent.post(&format!("{url}/indexes/{uid}/documents")); + request = request.set("Content-Type", "application/x-ndjson"); + if let Some(api_key) = api_key { + request = request.set("Authorization", &format!("Bearer {api_key}")); + } + request.send_bytes(buffer) +} + +enum ExportIndex {} diff --git a/crates/index-scheduler/src/scheduler/process_upgrade/mod.rs b/crates/index-scheduler/src/scheduler/process_upgrade.rs similarity index 100% rename from crates/index-scheduler/src/scheduler/process_upgrade/mod.rs rename to crates/index-scheduler/src/scheduler/process_upgrade.rs diff --git a/crates/index-scheduler/src/test_utils.rs b/crates/index-scheduler/src/test_utils.rs index 5f206b55c..bfed7f53a 100644 --- a/crates/index-scheduler/src/test_utils.rs +++ b/crates/index-scheduler/src/test_utils.rs @@ -37,6 +37,7 @@ pub(crate) enum FailureLocation { InsideCreateBatch, InsideProcessBatch, PanicInsideProcessBatch, + ProcessExport, ProcessUpgrade, AcquiringWtxn, UpdatingTaskAfterProcessBatchSuccess { task_uid: u32 }, diff --git a/crates/index-scheduler/src/utils.rs b/crates/index-scheduler/src/utils.rs index 7fe44d1c1..79571745b 100644 --- a/crates/index-scheduler/src/utils.rs +++ b/crates/index-scheduler/src/utils.rs @@ -601,12 +601,7 @@ impl crate::IndexScheduler { Details::Dump { dump_uid: _ } => { assert_eq!(kind.as_kind(), Kind::DumpCreation); } - Details::Export { - url: _, - api_key: _, - exported_documents: _, - skip_embeddings: _, - } => { + Details::Export { url: _, api_key: _, indexes: _ } => { assert_eq!(kind.as_kind(), Kind::Export); } Details::UpgradeDatabase { from: _, to: _ } => { diff --git a/crates/meilisearch-types/src/error.rs b/crates/meilisearch-types/src/error.rs index 22c668d59..08ee803ef 100644 --- a/crates/meilisearch-types/src/error.rs +++ b/crates/meilisearch-types/src/error.rs @@ -393,7 +393,8 @@ InvalidSettingsIndexChat , InvalidRequest , BAD_REQU InvalidExportUrl , InvalidRequest , BAD_REQUEST ; InvalidExportApiKey , InvalidRequest , BAD_REQUEST ; InvalidExportIndexesPatterns , InvalidRequest , BAD_REQUEST ; -InvalidExportSkipEmbeddings , InvalidRequest , BAD_REQUEST ; +InvalidExportIndexSkipEmbeddings , InvalidRequest , BAD_REQUEST ; +InvalidExportIndexFilter , InvalidRequest , BAD_REQUEST ; // Experimental features - Chat Completions UnimplementedExternalFunctionCalling , InvalidRequest , NOT_IMPLEMENTED ; UnimplementedNonStreamingChatCompletions , InvalidRequest , NOT_IMPLEMENTED ; diff --git a/crates/meilisearch-types/src/index_uid_pattern.rs b/crates/meilisearch-types/src/index_uid_pattern.rs index baf0249e2..f90fc7aee 100644 --- a/crates/meilisearch-types/src/index_uid_pattern.rs +++ b/crates/meilisearch-types/src/index_uid_pattern.rs @@ -12,7 +12,7 @@ use crate::index_uid::{IndexUid, IndexUidFormatError}; /// An index uid pattern is composed of only ascii alphanumeric characters, - and _, between 1 and 400 /// bytes long and optionally ending with a *. -#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, Hash)] +#[derive(Serialize, Deserialize, Deserr, Debug, Clone, PartialEq, Eq, PartialOrd, Ord, Hash)] #[deserr(try_from(&String) = FromStr::from_str -> IndexUidPatternFormatError)] pub struct IndexUidPattern(String); diff --git a/crates/meilisearch-types/src/task_view.rs b/crates/meilisearch-types/src/task_view.rs index 06fda0835..0a8d7b8fe 100644 --- a/crates/meilisearch-types/src/task_view.rs +++ b/crates/meilisearch-types/src/task_view.rs @@ -8,7 +8,9 @@ use utoipa::ToSchema; use crate::batches::BatchId; use crate::error::ResponseError; use crate::settings::{Settings, Unchecked}; -use crate::tasks::{serialize_duration, Details, IndexSwap, Kind, Status, Task, TaskId}; +use crate::tasks::{ + serialize_duration, Details, DetailsExportIndexSettings, IndexSwap, Kind, Status, Task, TaskId, +}; #[derive(Debug, Clone, PartialEq, Serialize, ToSchema)] #[serde(rename_all = "camelCase")] @@ -126,9 +128,7 @@ pub struct DetailsView { #[serde(skip_serializing_if = "Option::is_none")] pub api_key: Option, #[serde(skip_serializing_if = "Option::is_none")] - pub exported_documents: Option>, - #[serde(skip_serializing_if = "Option::is_none")] - pub skip_embeddings: Option, + pub indexes: Option>, } impl DetailsView { @@ -263,19 +263,9 @@ impl DetailsView { // So we return the first one we encounter but that shouldn't be an issue anyway. (Some(left), Some(_right)) => Some(left), }, - exported_documents: match ( - self.exported_documents.clone(), - other.exported_documents.clone(), - ) { + indexes: match (self.indexes.clone(), other.indexes.clone()) { (None, None) => None, - (None, Some(exp)) | (Some(exp), None) => Some(exp), - // We should never be able to batch multiple exports at the same time. - // So we return the first one we encounter but that shouldn't be an issue anyway. - (Some(left), Some(_right)) => Some(left), - }, - skip_embeddings: match (self.skip_embeddings, other.skip_embeddings) { - (None, None) => None, - (None, Some(skip)) | (Some(skip), None) => Some(skip), + (None, Some(indexes)) | (Some(indexes), None) => Some(indexes), // We should never be able to batch multiple exports at the same time. // So we return the first one we encounter but that shouldn't be an issue anyway. (Some(left), Some(_right)) => Some(left), @@ -369,9 +359,17 @@ impl From
for DetailsView { Details::IndexSwap { swaps } => { DetailsView { swaps: Some(swaps), ..Default::default() } } - Details::Export { url, api_key, exported_documents, skip_embeddings } => { - DetailsView { exported_documents: Some(exported_documents), ..Default::default() } - } + Details::Export { url, api_key, indexes } => DetailsView { + url: Some(url), + api_key, + indexes: Some( + indexes + .into_iter() + .map(|(pattern, settings)| (pattern.to_string(), settings)) + .collect(), + ), + ..Default::default() + }, Details::UpgradeDatabase { from, to } => DetailsView { upgrade_from: Some(format!("v{}.{}.{}", from.0, from.1, from.2)), upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)), diff --git a/crates/meilisearch-types/src/tasks.rs b/crates/meilisearch-types/src/tasks.rs index e31e6062b..1f8f7e7cb 100644 --- a/crates/meilisearch-types/src/tasks.rs +++ b/crates/meilisearch-types/src/tasks.rs @@ -9,7 +9,7 @@ use milli::Object; use roaring::RoaringBitmap; use serde::{Deserialize, Serialize, Serializer}; use time::{Duration, OffsetDateTime}; -use utoipa::ToSchema; +use utoipa::{schema, ToSchema}; use uuid::Uuid; use crate::batches::BatchId; @@ -158,8 +158,7 @@ pub enum KindWithContent { Export { url: String, api_key: Option, - indexes: Vec, - skip_embeddings: bool, + indexes: BTreeMap, }, UpgradeDatabase { from: (u32, u32, u32), @@ -172,6 +171,13 @@ pub struct IndexSwap { pub indexes: (String, String), } +#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] +#[serde(rename_all = "camelCase")] +pub struct ExportIndexSettings { + pub skip_embeddings: bool, + pub filter: Option, +} + impl KindWithContent { pub fn as_kind(&self) -> Kind { match self { @@ -280,14 +286,11 @@ impl KindWithContent { }), KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }), KindWithContent::SnapshotCreation => None, - KindWithContent::Export { url, api_key, indexes: _, skip_embeddings } => { - Some(Details::Export { - url: url.clone(), - api_key: api_key.clone(), - exported_documents: Default::default(), - skip_embeddings: *skip_embeddings, - }) - } + KindWithContent::Export { url, api_key, indexes } => Some(Details::Export { + url: url.clone(), + api_key: api_key.clone(), + indexes: indexes.into_iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(), + }), KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase { from: (from.0, from.1, from.2), to: ( @@ -354,14 +357,11 @@ impl KindWithContent { }), KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }), KindWithContent::SnapshotCreation => None, - KindWithContent::Export { url, api_key, indexes: _, skip_embeddings } => { - Some(Details::Export { - url: url.clone(), - api_key: api_key.clone(), - exported_documents: Default::default(), - skip_embeddings: skip_embeddings.clone(), - }) - } + KindWithContent::Export { url, api_key, indexes } => Some(Details::Export { + url: url.clone(), + api_key: api_key.clone(), + indexes: indexes.into_iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(), + }), KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase { from: *from, to: ( @@ -410,14 +410,11 @@ impl From<&KindWithContent> for Option
{ }), KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }), KindWithContent::SnapshotCreation => None, - KindWithContent::Export { url, api_key, indexes: _, skip_embeddings } => { - Some(Details::Export { - url: url.clone(), - api_key: api_key.clone(), - exported_documents: BTreeMap::default(), - skip_embeddings: skip_embeddings.clone(), - }) - } + KindWithContent::Export { url, api_key, indexes } => Some(Details::Export { + url: url.clone(), + api_key: api_key.clone(), + indexes: indexes.into_iter().map(|(p, s)| (p.clone(), s.clone().into())).collect(), + }), KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase { from: *from, to: ( @@ -684,8 +681,7 @@ pub enum Details { Export { url: String, api_key: Option, - exported_documents: BTreeMap, - skip_embeddings: bool, + indexes: BTreeMap, }, UpgradeDatabase { from: (u32, u32, u32), @@ -693,6 +689,23 @@ pub enum Details { }, } +#[derive(Debug, PartialEq, Clone, Serialize, Deserialize, ToSchema)] +#[schema(rename_all = "camelCase")] +pub struct DetailsExportIndexSettings { + #[serde(flatten)] + settings: ExportIndexSettings, + #[serde(skip_serializing_if = "Option::is_none")] + matched_documents: Option, + #[serde(skip_serializing_if = "Option::is_none")] + exported_documents: Option, +} + +impl From for DetailsExportIndexSettings { + fn from(settings: ExportIndexSettings) -> Self { + DetailsExportIndexSettings { settings, matched_documents: None, exported_documents: None } + } +} + impl Details { pub fn to_failed(&self) -> Self { let mut details = self.clone(); diff --git a/crates/meilisearch/src/routes/export.rs b/crates/meilisearch/src/routes/export.rs index 666799273..7029f0ebf 100644 --- a/crates/meilisearch/src/routes/export.rs +++ b/crates/meilisearch/src/routes/export.rs @@ -1,3 +1,5 @@ +use std::collections::BTreeMap; + use actix_web::web::{self, Data}; use actix_web::{HttpRequest, HttpResponse}; use deserr::actix_web::AwebJson; @@ -8,7 +10,7 @@ use meilisearch_types::error::deserr_codes::*; use meilisearch_types::error::ResponseError; use meilisearch_types::index_uid_pattern::IndexUidPattern; use meilisearch_types::keys::actions; -use meilisearch_types::tasks::KindWithContent; +use meilisearch_types::tasks::{ExportIndexSettings as DbExportIndexSettings, KindWithContent}; use serde::Serialize; use tracing::debug; use utoipa::{OpenApi, ToSchema}; @@ -69,8 +71,17 @@ async fn export( let export = export.into_inner(); debug!(returns = ?export, "Trigger export"); - let Export { url, api_key, indexes, skip_embeddings } = export; - let task = KindWithContent::Export { url, api_key, indexes, skip_embeddings }; + let Export { url, api_key, indexes } = export; + let task = KindWithContent::Export { + url, + api_key, + indexes: indexes + .into_iter() + .map(|(pattern, ExportIndexSettings { skip_embeddings, filter })| { + (pattern, DbExportIndexSettings { skip_embeddings, filter }) + }) + .collect(), + }; let uid = get_task_id(&req, &opt)?; let dry_run = is_dry_run(&req, &opt)?; let task: SummarizedTaskView = @@ -95,11 +106,22 @@ pub struct Export { #[deserr(default, error = DeserrJsonError)] pub api_key: Option, #[schema(value_type = Option>, example = json!(["movies", "steam-*"]))] - #[deserr(default, error = DeserrJsonError)] + #[deserr(default)] #[serde(default)] - pub indexes: Vec, + pub indexes: BTreeMap, +} + +#[derive(Debug, Deserr, ToSchema, Serialize)] +#[deserr(error = DeserrJsonError, rename_all = camelCase, deny_unknown_fields)] +#[serde(rename_all = "camelCase")] +#[schema(rename_all = "camelCase")] +pub struct ExportIndexSettings { #[schema(value_type = Option, example = json!("true"))] #[serde(default)] - #[deserr(default, error = DeserrJsonError)] + #[deserr(default, error = DeserrJsonError)] pub skip_embeddings: bool, + #[schema(value_type = Option, example = json!("genres = action"))] + #[serde(default)] + #[deserr(default, error = DeserrJsonError)] + pub filter: Option, }