Introduce a new route to export documents and enqueue the export task

This commit is contained in:
Clément Renault 2025-06-12 16:23:48 +02:00 committed by Kerollmops
parent ae8c1461e1
commit e74c3b692a
No known key found for this signature in database
GPG key ID: F250A4C4E3AE5F5F
14 changed files with 303 additions and 10 deletions

View file

@ -389,6 +389,11 @@ InvalidDocumentEditionContext , InvalidRequest , BAD_REQU
InvalidDocumentEditionFunctionFilter , InvalidRequest , BAD_REQUEST ;
EditDocumentsByFunctionError , InvalidRequest , BAD_REQUEST ;
InvalidSettingsIndexChat , InvalidRequest , BAD_REQUEST ;
// Export
InvalidExportUrl , InvalidRequest , BAD_REQUEST ;
InvalidExportApiKey , InvalidRequest , BAD_REQUEST ;
InvalidExportIndexesPatterns , InvalidRequest , BAD_REQUEST ;
InvalidExportSkipEmbeddings , InvalidRequest , BAD_REQUEST ;
// Experimental features - Chat Completions
UnimplementedExternalFunctionCalling , InvalidRequest , NOT_IMPLEMENTED ;
UnimplementedNonStreamingChatCompletions , InvalidRequest , NOT_IMPLEMENTED ;

View file

@ -317,6 +317,9 @@ pub enum Action {
#[serde(rename = "experimental.update")]
#[deserr(rename = "experimental.update")]
ExperimentalFeaturesUpdate,
#[serde(rename = "export")]
#[deserr(rename = "export")]
Export,
#[serde(rename = "network.get")]
#[deserr(rename = "network.get")]
NetworkGet,
@ -438,6 +441,8 @@ pub mod actions {
pub const EXPERIMENTAL_FEATURES_GET: u8 = ExperimentalFeaturesGet.repr();
pub const EXPERIMENTAL_FEATURES_UPDATE: u8 = ExperimentalFeaturesUpdate.repr();
pub const EXPORT: u8 = Export.repr();
pub const NETWORK_GET: u8 = NetworkGet.repr();
pub const NETWORK_UPDATE: u8 = NetworkUpdate.repr();

View file

@ -1,3 +1,5 @@
use std::collections::BTreeMap;
use milli::Object;
use serde::{Deserialize, Serialize};
use time::{Duration, OffsetDateTime};
@ -118,6 +120,15 @@ pub struct DetailsView {
pub upgrade_from: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub upgrade_to: Option<String>,
// exporting
#[serde(skip_serializing_if = "Option::is_none")]
pub url: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub api_key: Option<String>,
#[serde(skip_serializing_if = "Option::is_none")]
pub exported_documents: Option<BTreeMap<String, u32>>,
#[serde(skip_serializing_if = "Option::is_none")]
pub skip_embeddings: Option<bool>,
}
impl DetailsView {
@ -238,6 +249,37 @@ impl DetailsView {
Some(left)
}
},
url: match (self.url.clone(), other.url.clone()) {
(None, None) => None,
(None, Some(url)) | (Some(url), None) => Some(url),
// We should never be able to batch multiple exports at the same time.
// So we return the first one we encounter but that shouldn't be an issue anyway.
(Some(left), Some(_right)) => Some(left),
},
api_key: match (self.api_key.clone(), other.api_key.clone()) {
(None, None) => None,
(None, Some(key)) | (Some(key), None) => Some(key),
// We should never be able to batch multiple exports at the same time.
// So we return the first one we encounter but that shouldn't be an issue anyway.
(Some(left), Some(_right)) => Some(left),
},
exported_documents: match (
self.exported_documents.clone(),
other.exported_documents.clone(),
) {
(None, None) => None,
(None, Some(exp)) | (Some(exp), None) => Some(exp),
// We should never be able to batch multiple exports at the same time.
// So we return the first one we encounter but that shouldn't be an issue anyway.
(Some(left), Some(_right)) => Some(left),
},
skip_embeddings: match (self.skip_embeddings, other.skip_embeddings) {
(None, None) => None,
(None, Some(skip)) | (Some(skip), None) => Some(skip),
// We should never be able to batch multiple exports at the same time.
// So we return the first one we encounter but that shouldn't be an issue anyway.
(Some(left), Some(_right)) => Some(left),
},
// We want the earliest version
upgrade_from: match (self.upgrade_from.clone(), other.upgrade_from.clone()) {
(None, None) => None,
@ -327,6 +369,9 @@ impl From<Details> for DetailsView {
Details::IndexSwap { swaps } => {
DetailsView { swaps: Some(swaps), ..Default::default() }
}
Details::Export { url, api_key, exported_documents, skip_embeddings } => {
DetailsView { exported_documents: Some(exported_documents), ..Default::default() }
}
Details::UpgradeDatabase { from, to } => DetailsView {
upgrade_from: Some(format!("v{}.{}.{}", from.0, from.1, from.2)),
upgrade_to: Some(format!("v{}.{}.{}", to.0, to.1, to.2)),

View file

@ -1,5 +1,5 @@
use core::fmt;
use std::collections::HashSet;
use std::collections::{BTreeMap, HashSet};
use std::fmt::{Display, Write};
use std::str::FromStr;
@ -14,6 +14,7 @@ use uuid::Uuid;
use crate::batches::BatchId;
use crate::error::ResponseError;
use crate::index_uid_pattern::IndexUidPattern;
use crate::keys::Key;
use crate::settings::{Settings, Unchecked};
use crate::{versioning, InstanceUid};
@ -50,6 +51,7 @@ impl Task {
| SnapshotCreation
| TaskCancelation { .. }
| TaskDeletion { .. }
| Export { .. }
| UpgradeDatabase { .. }
| IndexSwap { .. } => None,
DocumentAdditionOrUpdate { index_uid, .. }
@ -86,6 +88,7 @@ impl Task {
| KindWithContent::TaskDeletion { .. }
| KindWithContent::DumpCreation { .. }
| KindWithContent::SnapshotCreation
| KindWithContent::Export { .. }
| KindWithContent::UpgradeDatabase { .. } => None,
}
}
@ -152,6 +155,12 @@ pub enum KindWithContent {
instance_uid: Option<InstanceUid>,
},
SnapshotCreation,
Export {
url: String,
api_key: Option<String>,
indexes: Vec<IndexUidPattern>,
skip_embeddings: bool,
},
UpgradeDatabase {
from: (u32, u32, u32),
},
@ -180,6 +189,7 @@ impl KindWithContent {
KindWithContent::TaskDeletion { .. } => Kind::TaskDeletion,
KindWithContent::DumpCreation { .. } => Kind::DumpCreation,
KindWithContent::SnapshotCreation => Kind::SnapshotCreation,
KindWithContent::Export { .. } => Kind::Export,
KindWithContent::UpgradeDatabase { .. } => Kind::UpgradeDatabase,
}
}
@ -192,6 +202,7 @@ impl KindWithContent {
| SnapshotCreation
| TaskCancelation { .. }
| TaskDeletion { .. }
| Export { .. } // TODO Should I resolve the index names?
| UpgradeDatabase { .. } => vec![],
DocumentAdditionOrUpdate { index_uid, .. }
| DocumentEdition { index_uid, .. }
@ -269,6 +280,14 @@ impl KindWithContent {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::Export { url, api_key, indexes: _, skip_embeddings } => {
Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
exported_documents: Default::default(),
skip_embeddings: *skip_embeddings,
})
}
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: (from.0, from.1, from.2),
to: (
@ -335,6 +354,14 @@ impl KindWithContent {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::Export { url, api_key, indexes: _, skip_embeddings } => {
Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
exported_documents: Default::default(),
skip_embeddings: skip_embeddings.clone(),
})
}
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: *from,
to: (
@ -383,6 +410,14 @@ impl From<&KindWithContent> for Option<Details> {
}),
KindWithContent::DumpCreation { .. } => Some(Details::Dump { dump_uid: None }),
KindWithContent::SnapshotCreation => None,
KindWithContent::Export { url, api_key, indexes: _, skip_embeddings } => {
Some(Details::Export {
url: url.clone(),
api_key: api_key.clone(),
exported_documents: BTreeMap::default(),
skip_embeddings: skip_embeddings.clone(),
})
}
KindWithContent::UpgradeDatabase { from } => Some(Details::UpgradeDatabase {
from: *from,
to: (
@ -499,6 +534,7 @@ pub enum Kind {
TaskDeletion,
DumpCreation,
SnapshotCreation,
Export,
UpgradeDatabase,
}
@ -516,6 +552,7 @@ impl Kind {
| Kind::TaskCancelation
| Kind::TaskDeletion
| Kind::DumpCreation
| Kind::Export
| Kind::UpgradeDatabase
| Kind::SnapshotCreation => false,
}
@ -536,6 +573,7 @@ impl Display for Kind {
Kind::TaskDeletion => write!(f, "taskDeletion"),
Kind::DumpCreation => write!(f, "dumpCreation"),
Kind::SnapshotCreation => write!(f, "snapshotCreation"),
Kind::Export => write!(f, "export"),
Kind::UpgradeDatabase => write!(f, "upgradeDatabase"),
}
}
@ -643,6 +681,12 @@ pub enum Details {
IndexSwap {
swaps: Vec<IndexSwap>,
},
Export {
url: String,
api_key: Option<String>,
exported_documents: BTreeMap<String, u32>,
skip_embeddings: bool,
},
UpgradeDatabase {
from: (u32, u32, u32),
to: (u32, u32, u32),
@ -667,6 +711,7 @@ impl Details {
Self::SettingsUpdate { .. }
| Self::IndexInfo { .. }
| Self::Dump { .. }
| Self::Export { .. }
| Self::UpgradeDatabase { .. }
| Self::IndexSwap { .. } => (),
}