Support no pattern when exporting

This commit is contained in:
Clément Renault 2025-06-16 15:50:32 +02:00 committed by Kerollmops
parent bc08cd0deb
commit 3329248a84
No known key found for this signature in database
GPG key ID: F250A4C4E3AE5F5F
3 changed files with 54 additions and 59 deletions

View file

@ -54,7 +54,7 @@ impl IndexScheduler {
indexes.len() as u32, indexes.len() as u32,
)); ));
let ExportIndexSettings { skip_embeddings, filter } = settings; let ExportIndexSettings { filter } = settings;
let index = self.index(uid)?; let index = self.index(uid)?;
let index_rtxn = index.read_txn()?; let index_rtxn = index.read_txn()?;
@ -131,56 +131,53 @@ impl IndexScheduler {
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?; .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
// TODO definitely factorize this code // TODO definitely factorize this code
if !*skip_embeddings { 'inject_vectors: {
'inject_vectors: { let embeddings = index
let embeddings = index .embeddings(&index_rtxn, docid)
.embeddings(&index_rtxn, docid) .map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
.map_err(|e| Error::from_milli(e, Some(uid.to_string())))?;
if embeddings.is_empty() { if embeddings.is_empty() {
break 'inject_vectors; break 'inject_vectors;
} }
let vectors = document let vectors = document
.entry(RESERVED_VECTORS_FIELD_NAME) .entry(RESERVED_VECTORS_FIELD_NAME)
.or_insert(serde_json::Value::Object(Default::default())); .or_insert(serde_json::Value::Object(Default::default()));
let serde_json::Value::Object(vectors) = vectors else { let serde_json::Value::Object(vectors) = vectors else {
return Err(Error::from_milli( return Err(Error::from_milli(
meilisearch_types::milli::Error::UserError( meilisearch_types::milli::Error::UserError(
meilisearch_types::milli::UserError::InvalidVectorsMapType { meilisearch_types::milli::UserError::InvalidVectorsMapType {
document_id: { document_id: {
if let Ok(Some(Ok(index))) = index if let Ok(Some(Ok(index))) = index
.external_id_of(&index_rtxn, std::iter::once(docid)) .external_id_of(&index_rtxn, std::iter::once(docid))
.map(|it| it.into_iter().next()) .map(|it| it.into_iter().next())
{ {
index index
} else { } else {
format!("internal docid={docid}") format!("internal docid={docid}")
} }
},
value: vectors.clone(),
}, },
), value: vectors.clone(),
Some(uid.to_string()), },
)); ),
Some(uid.to_string()),
));
};
for (embedder_name, embeddings) in embeddings {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == embedder_name)
.is_some_and(|conf| conf.user_provided.contains(docid));
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate: !user_provided,
}; };
vectors.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
for (embedder_name, embeddings) in embeddings {
let user_provided = embedding_configs
.iter()
.find(|conf| conf.name == embedder_name)
.is_some_and(|conf| conf.user_provided.contains(docid));
let embeddings = ExplicitVectors {
embeddings: Some(VectorOrArrayOfVectors::from_array_of_vectors(
embeddings,
)),
regenerate: !user_provided,
};
vectors
.insert(embedder_name, serde_json::to_value(embeddings).unwrap());
}
} }
} }

View file

@ -171,10 +171,9 @@ pub struct IndexSwap {
pub indexes: (String, String), pub indexes: (String, String),
} }
#[derive(Debug, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)] #[derive(Debug, Default, Clone, PartialEq, Eq, Serialize, Deserialize, ToSchema)]
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
pub struct ExportIndexSettings { pub struct ExportIndexSettings {
pub skip_embeddings: bool,
pub filter: Option<String>, pub filter: Option<String>,
} }

View file

@ -72,16 +72,19 @@ async fn export(
debug!(returns = ?export, "Trigger export"); debug!(returns = ?export, "Trigger export");
let Export { url, api_key, indexes } = export; let Export { url, api_key, indexes } = export;
let task = KindWithContent::Export {
url, let indexes = if indexes.is_empty() {
api_key, BTreeMap::from([(IndexUidPattern::new_unchecked("*"), DbExportIndexSettings::default())])
indexes: indexes } else {
indexes
.into_iter() .into_iter()
.map(|(pattern, ExportIndexSettings { skip_embeddings, filter })| { .map(|(pattern, ExportIndexSettings { filter })| {
(pattern, DbExportIndexSettings { skip_embeddings, filter }) (pattern, DbExportIndexSettings { filter })
}) })
.collect(), .collect()
}; };
let task = KindWithContent::Export { url, api_key, indexes };
let uid = get_task_id(&req, &opt)?; let uid = get_task_id(&req, &opt)?;
let dry_run = is_dry_run(&req, &opt)?; let dry_run = is_dry_run(&req, &opt)?;
let task: SummarizedTaskView = let task: SummarizedTaskView =
@ -116,10 +119,6 @@ pub struct Export {
#[serde(rename_all = "camelCase")] #[serde(rename_all = "camelCase")]
#[schema(rename_all = "camelCase")] #[schema(rename_all = "camelCase")]
pub struct ExportIndexSettings { pub struct ExportIndexSettings {
#[schema(value_type = Option<bool>, example = json!("true"))]
#[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidExportIndexSkipEmbeddings>)]
pub skip_embeddings: bool,
#[schema(value_type = Option<String>, example = json!("genres = action"))] #[schema(value_type = Option<String>, example = json!("genres = action"))]
#[serde(default)] #[serde(default)]
#[deserr(default, error = DeserrJsonError<InvalidExportIndexFilter>)] #[deserr(default, error = DeserrJsonError<InvalidExportIndexFilter>)]